From 78ab7cd446195ff52b4d4aab44799f49fbcfdba9 Mon Sep 17 00:00:00 2001 From: Rob Carnell Date: Sun, 4 Feb 2024 15:54:21 -0500 Subject: [PATCH] Separated out numerical mean and variance from analytic Added tests rewrote equations in vignette --- R/mle.R | 92 ++++++++++- tests/testthat/helper-mle-utils.R | 4 + tests/testthat/test-mle-utils.R | 40 +---- tests/testthat/test-mle.R | 37 ++++- vignettes/triangledistributionmath.Rmd | 219 +++++++++++++++++++------ 5 files changed, 290 insertions(+), 102 deletions(-) create mode 100644 tests/testthat/helper-mle-utils.R diff --git a/R/mle.R b/R/mle.R index 92d9701..0205d29 100644 --- a/R/mle.R +++ b/R/mle.R @@ -365,16 +365,34 @@ f_rth_order_stat <- function(x, n, r, a, b, c) #' #' @return the expected value #' -#' @importFrom stats integrate #' @examples #' mean_rth_order_stat(10, 5, 0, 1, 0.5) mean_rth_order_stat <- function(n, r, a, b, c) { - integrand <- function(x, n, r, a, b, c) {x * f_rth_order_stat(x, n, r, a, b, c)} - stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c) + # n <- 200 + # r <- 100 + # a <- 0 + # b <- 1 + # c <- 0.5 + if (n > 10) { + return(mean_rth_order_stat_numeric(n, r, a, b, c)) + } else { + coefs1 <- sapply(0:(n-r), function(k) { + choose(n-r, k) * (b-a)^(k-n) * (c-a)^{n-k} * (-1)^(n-r-k) * + (c/(n-k) - (c-a)/(n-k)/(2*n-2*k+1)) + }) + + coefs2 <- sapply(0:(r-1), function(k) { + choose(r-1, k) * (b-a)^(k-n) * (c-b)^{n-k} * + (-c/(n-k) + (c-b)/(n-k)/(2*n-2*k+1)) + }) + + return(ifelse(c > a, r*choose(n, r)*sum(coefs1), 0) + + ifelse(c < b, r*choose(n, r)*(-1)^(n-r)*sum(coefs2), 0)) + } } -#' Variance of the rth order statistic +#' Expected value of the rth order statistic using numeric integration #' #' @noRd #' @@ -388,13 +406,67 @@ mean_rth_order_stat <- function(n, r, a, b, c) #' #' @importFrom stats integrate #' @examples +#' mean_rth_order_stat_numeric(10, 5, 0, 1, 0.5) +mean_rth_order_stat_numeric <- function(n, r, a, b, c) +{ + integrand <- function(x, n, r, a, b, c) {x * f_rth_order_stat(x, n, r, a, b, c)} + stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c)$value +} + +#' Variance of the rth order statistic +#' +#' @noRd +#' +#' @param n number of order statistics +#' @param r the order statistic number +#' @param a the minimum support of the triangle distribution \code{a < b}, \code{a <= min(z)} +#' @param b the maximum support of the triangle distribution \code{b >= max(z)} +#' @param c the mode of the triangle distribution +#' +#' @return the variance +#' +#' @examples #' variance_rth_order_stat(10, 5, 0, 1, 0.5) variance_rth_order_stat <- function(n, r, a, b, c) +{ + if (n > 10) { + variance_rth_order_stat_numeric(n, r, a, b, c) + } else { + coefs1 <- sapply(0:(n-r), function(k) { + choose(n-r, k) * (b-a)^(k-n) * (c-a)^(n-k) * (-1)^(n-r-k) / (n-k) * + (c^2 - 2*c*(c-a)/(2*n-2*k+1) + 2*(c-a)^2/(2*n-2*k+1)/(2*n-2*k+2)) + }) + coefs2 <- sapply(0:(r-1), function(k) { + choose(r-1, k) * (b-a)^(k-n) * (c-b)^(n-k)/(n-k) * + (-c^2 + 2*c*(c-b)/(2*n-2*k+1) - 2*(c-b)^2/(2*n-2*k+1)/(2*n-2*k+2)) + }) + m <- mean_rth_order_stat(n, r, a, b, c) + return(ifelse(c > a, r*choose(n, r)*sum(coefs1), 0) + + ifelse(c < b, r*choose(n, r)*(-1)^(n-r)*sum(coefs2), 0) - m^2) + } +} + +#' Variance of the rth order statistic using numeric integration +#' +#' @noRd +#' +#' @param n number of order statistics +#' @param r the order statistic number +#' @param a the minimum support of the triangle distribution \code{a < b}, \code{a <= min(z)} +#' @param b the maximum support of the triangle distribution \code{b >= max(z)} +#' @param c the mode of the triangle distribution +#' +#' @return the variance +#' +#' @importFrom stats integrate +#' @examples +#' variance_rth_order_stat_numeric(10, 5, 0, 1, 0.5) +variance_rth_order_stat_numeric <- function(n, r, a, b, c) { integrand <- function(x, n, r, a, b, c) {x * x * f_rth_order_stat(x, n, r, a, b, c)} E_x2 <- stats::integrate(integrand, lower = a, upper = b, n = n, r = r, a = a, b = b, c = c) - E_x <- mean_rth_order_stat(n, r, a, b, c) - E_x2$value - E_x$value * E_x$value + E_x <- mean_rth_order_stat_numeric(n, r, a, b, c) + E_x2$value - E_x * E_x } #' Maximum likelihood estimate of the triangle distribution parameters @@ -450,6 +522,14 @@ triangle_mle <- function(x, debug = FALSE, maxiter = 100) } var_chat <- variance_rth_order_stat(length(x), mle_c2$r_hat, mle_ab$a, mle_ab$b, mle_c2$c_hat) + if (any(var_chat < 0) | debug) { + cat("\nNegative Varince in c hat\n") + cat("n=", length(x), "\n") + cat("r=", mle_c2$r_hat, "\n") + cat("a=", mle_ab$a, "\n") + cat("b=", mle_ab$b, "\n") + cat("c=", mle_c2$c_hat, "\n") + } vcov <- rbind(cbind(solve(mle_ab$hessian_ab), c(0, 0)), c(0, 0, var_chat)) dimnames(vcov) <- list(c("a", "b", "c"), c("a", "b", "c")) diff --git a/tests/testthat/helper-mle-utils.R b/tests/testthat/helper-mle-utils.R new file mode 100644 index 0000000..17306aa --- /dev/null +++ b/tests/testthat/helper-mle-utils.R @@ -0,0 +1,4 @@ +set.seed(39854) +xtest <- rtriangle(200, 0, 1, 0.5) + +mle1 <- triangle_mle(xtest) diff --git a/tests/testthat/test-mle-utils.R b/tests/testthat/test-mle-utils.R index beee7b1..5a990bd 100644 --- a/tests/testthat/test-mle-utils.R +++ b/tests/testthat/test-mle-utils.R @@ -1,8 +1,4 @@ test_that("summary works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) summ <- summary(mle1) expect_equal(summ@coef[,1], coef(mle1)) @@ -13,28 +9,16 @@ test_that("summary works", { }) test_that("print works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) expect_output(print(mle1)) }) test_that("coef works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) cf <- coef(mle1) expect_equal(mle1$coef, cf) }) test_that("logLik works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) ll <- logLik(mle1) expect_equivalent(-1*nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]), @@ -44,10 +28,6 @@ test_that("logLik works", { }) test_that("AIC works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) aic <- AIC(mle1) expect_equivalent(2 * nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]) + 2 * 3, @@ -55,10 +35,6 @@ test_that("AIC works", { }) test_that("BIC works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) bic <- BIC(mle1) expect_equivalent(2 * nLL_triangle(xtest, mle1$coef[1], mle1$coef[2], mle1$coef[3]) + log(length(xtest)) * 3, @@ -66,28 +42,20 @@ test_that("BIC works", { }) test_that("vcov works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) v <- vcov(mle1) expect_equivalent(mle1$vcov, v) }) test_that("profile works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) prof <- profile(mle1) expect_equivalent("profile.mle", class(prof)) expect_equal(3, length(prof@profile)) expect_equal(3, ncol(prof@profile$a$par.vals)) - mle1 <- standard_triangle_mle(xtest) - prof <- profile(mle1) + mle2 <- standard_triangle_mle(xtest) + prof <- profile(mle2) expect_equivalent("profile.mle", class(prof)) expect_equal(3, length(prof@profile)) @@ -95,10 +63,6 @@ test_that("profile works", { }) test_that("confint works", { - set.seed(39854) - xtest <- rtriangle(200, 0, 1, 0.5) - - mle1 <- triangle_mle(xtest) cfi <- confint(mle1, level = 0.95) expect_equal(c(3,2), dim(cfi)) diff --git a/tests/testthat/test-mle.R b/tests/testthat/test-mle.R index 9314738..4d0342a 100644 --- a/tests/testthat/test-mle.R +++ b/tests/testthat/test-mle.R @@ -95,15 +95,31 @@ test_that("f_rth_order_stat works", { test_that("mean_rth_order_stat works", { temp <- mean_rth_order_stat(10, 5, 0, 1, 0.5) - expect_true(temp$value >= 0 & temp$value <= 1) - expect_true(temp$value < 0.5) + expect_true(temp >= 0 & temp <= 1) + expect_true(temp < 0.5) + temp <- mean_rth_order_stat(10, 6, 0, 1, 0.5) - expect_true(temp$value > 0.5) + expect_true(temp > 0.5) temp <- mean_rth_order_stat(10, 1, 0, 1, 0.5) - expect_true(temp$value >= 0 & temp$value <= 1) + expect_true(temp >= 0 & temp <= 1) + temp <- mean_rth_order_stat(10, 10, 0, 1, 0.5) - expect_true(temp$value >= 0 & temp$value <= 1) + expect_true(temp >= 0 & temp <= 1) + + expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 2), + mean_rth_order_stat(10, 5, 1, 3, 2), tolerance = 1E-6) + expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 1), + mean_rth_order_stat(10, 5, 1, 3, 1), tolerance = 1E-6) + expect_equal(mean_rth_order_stat_numeric(10, 5, 1, 3, 3), + mean_rth_order_stat(10, 5, 1, 3, 3), tolerance = 1E-6) + expect_equal(mean_rth_order_stat_numeric(10, 1, 1, 3, 2), + mean_rth_order_stat(10, 1, 1, 3, 2), tolerance = 1E-6) + expect_equal(mean_rth_order_stat_numeric(10, 10, 1, 3, 2), + mean_rth_order_stat(10, 10, 1, 3, 2), tolerance = 1E-4) ###### + + expect_true(mean_rth_order_stat(200, 100, 0, 1, 0.5) < 1) + expect_true(mean_rth_order_stat(200, 100, 0, 1, 0.5) > 0) }) test_that("variance_rth_order_stat works", { @@ -113,6 +129,17 @@ test_that("variance_rth_order_stat works", { expect_true(variance_rth_order_stat(10, 8, 0, 1, 0.5) >= 0) expect_true(variance_rth_order_stat(10, 9, 0, 1, 0.5) >= 0) expect_true(variance_rth_order_stat(10, 10, 0, 1, 0.5) >= 0) + + expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 2), + variance_rth_order_stat(10, 5, 1, 3, 2), tolerance = 1E-6) + expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 1), + variance_rth_order_stat(10, 5, 1, 3, 1), tolerance = 1E-6) + expect_equal(variance_rth_order_stat_numeric(10, 5, 1, 3, 3), + variance_rth_order_stat(10, 5, 1, 3, 3), tolerance = 1E-6) + expect_equal(variance_rth_order_stat_numeric(10, 1, 1, 3, 2), + variance_rth_order_stat(10, 1, 1, 3, 2), tolerance = 1E-6) + expect_equal(variance_rth_order_stat_numeric(10, 10, 1, 3, 2), + variance_rth_order_stat(10, 10, 1, 3, 2), tolerance = 1E-3) #### }) test_that("triangle_mle works", { diff --git a/vignettes/triangledistributionmath.Rmd b/vignettes/triangledistributionmath.Rmd index bc6b199..cc0c76e 100644 --- a/vignettes/triangledistributionmath.Rmd +++ b/vignettes/triangledistributionmath.Rmd @@ -64,22 +64,36 @@ $$f(x) = \end{array} \right.\ \ \ \ (3)$$ -## Triangle Mean +## Triangle Distribution Function -Integrating equation (3) to find $E(x)$, +$$F_{x \le c}(x) = \int_{-\infty}^{x}f(t)dt = \int_a^x \frac{2(t-a)}{(b-a)(c-a)} dt$$ -$$E(X)=\int xf(x)dx = \frac{h}{c-a}\int_a^c (x^2-ax) dx + -\frac{h}{c-b}\int_c^b (x^2-bx) dx$$ +$$ = \frac{(x-a)^2}{(b-a)(c-a)}$$ -$$=\frac{a+b+c}{3}\ \ \ \ (4)$$ +$$F_{x \gt c}(x) = 1 - \int_x^b \frac{2(t-b)}{(b-a)(c-b)} dt$$ +$$ = 1 + \frac{(x - b)^2}{(b-a)(c-b)}$$ -## Triangle Variance +## Triangle Mean -$$V(X) = E(X^2) - \big(E(X)\big)^2 = \int x^2f(x)dx- \bigg(\frac{a+b+c}{3}\bigg)^2$$ +Integrating equation (3) to find $E(x)$, -$$=\frac{h}{c-a}\int_{a}^{c} x^2(x-a) dx + \frac{h}{c-b} \int_{c}^{b} x^2(x-b) dx- \bigg(\frac{a+b+c}{3}\bigg)^2$$ +$$ +\begin{align} +E(X) &= \int xf(x)dx = \frac{2}{(b-a)(c-a)}\int_a^c (x^2-ax) dx + +\frac{2}{(b-a)(c-b)}\int_c^b (x^2-bx) dx \\ +&= \frac{a+b+c}{3}\ \ \ \ (4) +\end{align} +$$ + +## Triangle Variance -$$=\frac{a^2+b^2+c^2-ab-ac-bc}{18}$$ +$$ +\begin{align} +V(X) &= E(X^2) - \big(E(X)\big)^2 = \int x^2f(x)dx- \bigg(\frac{a+b+c}{3}\bigg)^2 \\ +&= \frac{2}{(b-a)(c-a)}\int_{a}^{c} x^2(x-a) dx + \frac{2}{(b-a)(c-b)} \int_{c}^{b} x^2(x-b) dx- \bigg(\frac{a+b+c}{3}\bigg)^2 \\ +&= \frac{a^2+b^2+c^2-ab-ac-bc}{18} +\end{align} +$$ ## Logarithmic Triangle distribution @@ -129,30 +143,41 @@ $$\beta_2=\frac{2}{(c_l-b_l)(b_l-a_l)}$$ Finding the CDF, -$$G(y)=\int_{-\infty}^y g(y)dy$$ - -$$\mbox{for}\ a \leq y \leq c,\ \ G(y) = \frac{\beta_1}{\log({\phi})} \int_a^y \frac{\log(y)}{y\log({\phi})}-\frac{a_l}{y}dy$$ +$$G(y) = \int_{-\infty}^y g(y)dy$$ -$$=\beta_1 \bigg[\frac{\log_{\phi}^2(y)}{2} - a_l \log_{\phi}(y) - \frac{a_l^2}{2} + a_l^2\bigg]$$ - -$$\mbox{for}\ c < y \leq b,\ \ G(y) = G(c) + \frac{\beta_2}{\log({\phi})} \int_c^y \frac{\log(y)}{y\log({\phi})} - \frac{b_l}{y}dy$$ +$$ +\begin{align} +\mbox{for}\ a \leq y \leq c,\ \ G(y) &= \frac{\beta_1}{\log({\phi})} \int_a^y \frac{\log(y)}{y\log({\phi})}-\frac{a_l}{y}dy \\ +&=\beta_1 \bigg[\frac{\log_{\phi}^2(y)}{2} - a_l \log_{\phi}(y) - \frac{a_l^2}{2} + a_l^2\bigg] +\end{align} +$$ -$$=G(c) + \beta_2 \bigg[\frac{\log_{\phi}^2(y)}{2} - b_l \log_{\phi}(y) - \frac{c_l^2}{2} + b_l c_l\bigg]$$ +$$ +\begin{align} +\mbox{for}\ c < y \leq b,\ \ G(y) &= G(c) + \frac{\beta_2}{\log({\phi})} \int_c^y \frac{\log(y)}{y\log({\phi})} - \frac{b_l}{y}dy \\ +&=G(c) + \beta_2 \bigg[\frac{\log_{\phi}^2(y)}{2} - b_l \log_{\phi}(y) - \frac{c_l^2}{2} + b_l c_l\bigg] +\end{align} +$$ Checking that the CDF is 1 at b, -$$G(b) = \frac{c_l^2 - 2a_l c_l + a_l^2}{(c_l-a_l)(b_l-a_l)} + \frac{-b_l^2-c_l^2+2b_lc_l}{(c_l-b_l)(b_l-a_l)}$$ - -$$= \frac{c_l-a_l}{b_l-a_l} + \frac{-(c_l-b_l)}{b_l-a_l} = 1$$ +$$ +\begin{align} +G(b) &= \frac{c_l^2 - 2a_l c_l + a_l^2}{(c_l-a_l)(b_l-a_l)} + \frac{-b_l^2-c_l^2+2b_lc_l}{(c_l-b_l)(b_l-a_l)} \\ +&= \frac{c_l-a_l}{b_l-a_l} + \frac{-(c_l-b_l)}{b_l-a_l} = 1 +\end{align} +$$ Now calculating $E(y)$, -$$E(y) = \int y\ g(y)\ dy$$ - -$$=\frac{\beta_1}{\log({\phi})} \int_a^c \bigg[\frac{\log(y)}{\log({\phi})} - a_l\bigg]dy + -\frac{\beta_2}{\log({\phi})} \int_c^b \bigg[\frac{\log(y)}{\log({\phi})} - b_l\bigg]dy$$ - -$$=\frac{c\beta_1}{\log^2({\phi})} \bigg[\log(c) - 1 - \log(a) + \frac{a}{c} \bigg] + \frac{c\beta_2}{\log^2({\phi})} \bigg[\frac{-b}{c} - \log(c) + 1 + \log(b) \bigg]$$ +$$ +\begin{align} +E(y) &= \int y\ g(y)\ dy \\ +&=\frac{\beta_1}{\log({\phi})} \int_a^c \bigg[\frac{\log(y)}{\log({\phi})} - a_l\bigg]dy + +\frac{\beta_2}{\log({\phi})} \int_c^b \bigg[\frac{\log(y)}{\log({\phi})} - b_l\bigg]dy \\ +&=\frac{c\beta_1}{\log^2({\phi})} \bigg[\log(c) - 1 - \log(a) + \frac{a}{c} \bigg] + \frac{c\beta_2}{\log^2({\phi})} \bigg[\frac{-b}{c} - \log(c) + 1 + \log(b) \bigg] +\end{align} +$$ ## Method of Moments Estimation @@ -201,7 +226,7 @@ the $r^{th}$ and $r+1$ order statistics. For notation purposes, also define $X_ $$\large \max_{0 \le c \le 1} L(x|c) = \max_{r \ \epsilon \ (0,\dots,n)} \ \ \max_{x_{(r)} \le c \le x_{(r+1)}} \ \ L(x|c)$$ -#### Case 1. $c$ is between the first and second to last order statistic $r \ \epsilon \ (1, \dots, n-1)$ +#### Case 1: $c$ is between the first and second to last order statistic $r \ \epsilon \ (1, \dots, n-1)$ Noticing that maximizing the likelihood is equivalent to minimizing the denominator: @@ -221,7 +246,7 @@ $$\frac{dz}{dc} = rc^{(r-1)}(1-c)^{n-r} + c^r(n-r)(1-c)^{n-r-1}(-1) = c^{(r-1)}( $\frac{dz}{dc} = 0$ at $c=0,\ 1,\ \frac{r}{n}$. At $0 < c < \frac{r}{n}$, $z$ is positive, and at $\frac{r}{n} < c < 1$, $z$ is negative. Therefore, $z$ is unimodal on $(0,1)$. -#### Case 2. $c$ is between 0 and the first order statistic $r = 0$ +#### Case 2: $c$ is between 0 and the first order statistic $r = 0$ $$\large \max L(x|c) = \max_{0 \le c \le x_{(1)}} \prod_{i=1}^{n} \frac{1-x_{(i)}}{1-c} = \prod_{i=1}^{n} \frac{1-x_{(i)}}{1-x_{(1)}}$$ @@ -229,7 +254,7 @@ Choosing the largest endpoint in the interval, creates the smallest denominator, Therefore, for this case, it is sufficient to test the likelihood using $c$ at the first sampled point. -#### Case 3. $c$ is between the last order statistic $r = n$ and 1 +#### Case 3: $c$ is between the last order statistic $r = n$ and 1 $$\large \max L(x|c) = \max_{x_{(n)} \le c \le 1} \prod_{i=1}^{n} \frac{x_{(i)}}{c} = \prod_{i=1}^{n} \frac{x_{(i)}}{x_{(n)}}$$ @@ -245,48 +270,60 @@ so the test must be performed iteratively as $a$ and $b$ are separately optimize ### Negative Log Likelihood -$$nLL = -\log(L) = -\log\left(\prod_i^n f(x_i)\right)$$ - -$$ = - \sum_i^n \log\left(f(x_i)\right) = - \sum_{i: \ a \le x_i \lt c}^{n_1} \log\left(f(x_i)\right) - \sum_{i: \ c \le x_i \le b}^{n_2} \log\left(f(x_i)\right)$$ +$$ +\begin{align} +nLL &= -\log(L) = -\log\left(\prod_i^n f(x_i)\right) \\ +&= - \sum_i^n \log\left(f(x_i)\right) = - \sum_{i: \ a \le x_i \lt c}^{n_1} \log\left(f(x_i)\right) - \sum_{i: \ c \le x_i \le b}^{n_2} \log\left(f(x_i)\right) +\end{align} +$$ where $n = n_1 + n_2$ -#### Case 1 $a = c \lt b$ +#### Case 1: $a = c \lt b$ -$$ nLL = - \sum_{i}^{n} \log(2) + \log(b-x_i) - \log(b-a) - \log(b-c)$$ - -$$ = -n\log(2) + n\log(b-a) + n \log(b-c) - \sum_{i}^{n} \log(b-x_i)$$ - -#### Case 2 $a \lt c = b$ - -$$ nLL = - \sum_{i}^{n} \log(2) + \log(x_i - a) - \log(b-a) - \log(c-a) $$ +$$ +\begin{align} +nLL &= - \sum_{i}^{n} \log(2) + \log(b-x_i) - \log(b-a) - \log(b-c) \\ +&= -n\log(2) + n\log(b-a) + n \log(b-c) - \sum_{i}^{n} \log(b-x_i) +\end{align} +$$ -$$ = -n\log(2) + n\log(b-a) + n\log(c-a) - \sum_{i}^{n} \log(x_i - a)$$ +#### Case 2: $a \lt c = b$ -#### Case 3 a \lt c \lt b +$$ +\begin{align} +nLL &= - \sum_{i}^{n} \log(2) + \log(x_i - a) - \log(b-a) - \log(c-a) \\ +&= -n\log(2) + n\log(b-a) + n\log(c-a) - \sum_{i}^{n} \log(x_i - a) +\end{align} +$$ -$$ nLL = - \sum_{i: \ a \lt x_i \lt c}^{n_1} \log(2) + \log(x_i - a) - \log(b-a) - \log(c-a) - \sum_{i: \ c \le x_i \lt b}^{n_2} \log(2) + \log(b-x_i) - \log(b-a) - \log(b-c)$$ +#### Case 3: $a \lt c \lt b$ -$$ = -n\log(2) + n\log(b-a) + n_1\log(c-a) + n_2 \log(b-c) - \sum_{i: \ a \lt x_i \lt c}^{n_1} \log(x_i - a) - \sum_{i: \ c \le x_i \lt b}^{n_2} \log(b-x_i)$$ +$$ +\begin{align} +nLL &= - \sum_{i: \ a \lt x_i \lt c}^{n_1} \log(2) + \log(x_i - a) - \log(b-a) - \log(c-a) - \sum_{i: \ c \le x_i \lt b}^{n_2} \log(2) + \log(b-x_i) - \log(b-a) - \log(b-c) \\ +&= -n\log(2) + n\log(b-a) + n_1\log(c-a) + n_2 \log(b-c) - \sum_{i: \ a \lt x_i \lt c}^{n_1} \log(x_i - a) - \sum_{i: \ c \le x_i \lt b}^{n_2} \log(b-x_i) +\end{align} +$$ ### Gradient of the negative Log Likelihood Given $c$: The negative log likelihood is not differentiable with respect to $c$ because the limits of the sum ($n_1$ and $n_2$) are functions of $c$. Therefore the gradient and hessian are derived as if $c$ is fixed. -#### Case 1 $a = c \lt b$ +#### Case 1: $a = c \lt b$ $$\frac{\partial nLL}{\partial a} = - \frac{n}{b-a}$$ $$\frac{\partial nLL}{\partial b} = \frac{n}{b-a} + \frac{n}{b-c} - \sum_i^{n} \frac{1}{b-x_i}$$ -#### Case 2 $a \lt c = b$ +#### Case 2: $a \lt c = b$ $$\frac{\partial nLL}{\partial a} = - \frac{n}{b-a} - \frac{n}{c-a} + \sum_i^{n} \frac{1}{x_i - a}$$ $$\frac{\partial nLL}{\partial b} = \frac{n}{b-a}$$ -#### Case 3 $a \lt c \lt b$ +#### Case 3: $a \lt c \lt b$ $$\frac{\partial nLL}{\partial a} = - \frac{n}{b-a} - \frac{n_1}{c-a} + \sum_i^{n_1} \frac{1}{x_i - a}$$ @@ -294,7 +331,7 @@ $$\frac{\partial nLL}{\partial b} = \frac{n}{b-a} + \frac{n_2}{b-c} - \sum_i^{n_ ### Hessian of the negative Log Likelihood Given $c$: -#### Case 1 $a = c \lt b$ +#### Case 1: $a = c \lt b$ $$\frac{\partial^2nLL}{\partial a^2} = - \frac{n}{(b-a)^2}$$ @@ -302,7 +339,7 @@ $$\frac{\partial^2 nLL}{\partial b^2} = -\frac{n}{(b-a)^2} - \frac{n}{(b-c)^2} + $$\frac{\partial^2 nLL}{\partial a\partial b} = \frac{\partial^2 nLL}{\partial b\partial a} = - \frac{n}{(b-a)^2}$$ -#### Case 2 $a \lt c = b$ +#### Case 2: $a \lt c = b$ $$\frac{\partial^2 nLL}{\partial a^2} = - \frac{n}{(b-a)^2} - \frac{n}{(c-a)^2} + \sum_i^{n} \frac{1}{(x_i - a)^2}$$ @@ -310,7 +347,7 @@ $$\frac{\partial^2 nLL}{\partial b^2} = - \frac{n}{(b-a)^2}$$ $$\frac{\partial^2 nLL}{\partial a\partial b} = \frac{\partial^2 nLL}{\partial b\partial a} = - \frac{n}{(b-a)^2}$$ -#### Case 3 $a \lt c \lt b$ +#### Case 3: $a \lt c \lt b$ $$\frac{\partial^2 nLL}{\partial a^2} = - \frac{n}{(b-a)^2} - \frac{n_1}{(c-a)^2} + \sum_i^{n_1} \frac{1}{(x_i - a)^2}$$ @@ -341,11 +378,87 @@ $$ #### $r^{th}$ order statistic -$$f(x_{(r)}) = \frac{n!}{(r-1)!(n-r)!} f(x) [F(x)]^{(r-1)}[1-F(x)]^{(n-r)}$$ +$$f(x_{(r)}) = r {n \choose r} f(x) [F(x)]^{r-1}[1-F(x)]^{n-r}$$ + +#### Expected value of the $r^{th}$ order statistic + +$$ +\begin{align} +E(X_{(r)}) &= \int x f(x_{(r)}) dx \\ +&= \int_a^c xr {n \choose r} \frac{2(x-a)}{(b-a)(c-a)} \left(\frac{(x-a)^2}{(b-a)(c-a)}\right)^{r-1}\left(1 - \frac{(x-a)^2}{(b-a)(c-a)}\right)^{n-r}dx \\ +&+ \int_c^b xr {n \choose r} \frac{2(x-b)}{(b-a)(c-b)} \left(1+\frac{(x-b)^2}{(b-a)(c-b)}\right)^{r-1}\left(- \frac{(x-b)^2}{(b-a)(c-b)}\right)^{n-r}dx +\end{align} +$$ + +To clean up the notation a little, define: + +$$\gamma_0 = 2r {n \choose r}$$ + +$$\gamma_1 = (b-a)(c-a)$$ + +$$\gamma_2 = (b-a)(c-b)$$ + +Continuing: + +$$E(X_{(r)}) = \int_a^c \frac{\gamma_0}{\gamma_1^n} x(x-a)^{2r-1} \left(\gamma_1 - (x-a)^2\right)^{n-r}dx + \int_c^b \frac{\gamma_0}{\gamma_2^n} (-1)^{n-r}x(x-b)^{2n-2r+1} \left(\gamma_2 + (x-b)^2\right)^{r-1}dx$$ + +By using a binomial expansion, we can prevent having to integrate by parts multiple times. + +$$(a+b)^n = \sum_{k=0}^n {n \choose k} a^kb^{n-k}$$ + +$$ +\begin{align} +E(X_{(r)}) &= \int_a^c \frac{\gamma_0}{\gamma_1^n} x(x-a)^{2r-1} \sum_{k=0}^{n-r} {n-r \choose k} \gamma_1^k (-1)^{n-r-k}(x-a)^{2n-2r-2k}dx \\ +&+ \int_c^b \frac{\gamma_0}{\gamma_2^n} (-1)^{n-r}x(x-b)^{2n-2r+1} \sum_{k=0}^{r-1} {r-1 \choose k} \gamma_2^k (x-b)^{2r-2-2k}dx +\end{align} +$$ + +$$ +\begin{align} +E(X_{(r)}) &= \frac{\gamma_0}{\gamma_1^n} \sum_{k=0}^{n-r} {n-r \choose k} \gamma_1^k (-1)^{n-r-k}\int_a^c x(x-a)^{2n-2k-1}dx \\ +&+ \frac{\gamma_0}{\gamma_2^n} (-1)^{(n-r)} \sum_{k=0}^{r-1} {r-1 \choose k} \gamma_2^k \int_c^b x(x-b)^{2n-2k-1}dx +\end{align} +$$ + +$$ +\begin{align} +E(X_{(r)}) &= \gamma_0 \sum_{k=0}^{n-r} {n-r \choose k} \gamma_1^{k-n} (-1)^{n-r-k} \left[\frac{c(c-a)^{2n-2k}}{2n-2k} - \frac{(c-a)^{2n-2k+1}}{(2n-2k)(2n-2k+1)}\right] \\ +&+ \gamma_0 (-1)^{n-r} \sum_{k=0}^{r-1} {r-1 \choose k} \gamma_2^{k-n} \left[\frac{-c(c-b)^{2n-2k}}{2n-2k}+\frac{(c-b)^{2n-2k+1}}{(2n-2k)(2n-2k+1)}\right] +\end{align} +$$ + +$$ +\begin{align} +E(X_{(r)}) &= r {n \choose r} \sum_{k=0}^{n-r} {n-r \choose k} (b-a)^{k-n} (c-a)^{n-k} (-1)^{n-r-k} \left[\frac{c}{n-k} - \frac{c-a}{(n-k)(2n-2k+1)}\right] \\ +&+ r {n \choose r} (-1)^{n-r} \sum_{k=0}^{r-1} {r-1 \choose k} (b-a)^{k-n} (c-b)^{n-k} \left[\frac{-c}{n-k}+\frac{c-b}{(n-k)(2n-2k+1)}\right] +\end{align} +$$ + +#### Expected Value of $r^{th}$ order statistic squared + +$$ +\begin{align} +E(X_{(r)}^2) &= \frac{\gamma_0}{\gamma_1^n} \sum_{k=0}^{n-r} {n-r \choose k} \gamma_1^k (-1)^{n-r-k}\int_a^c x^2(x-a)^{2n-2k-1}dx \\ +&+ \frac{\gamma_0}{\gamma_2^n} (-1)^{(n-r)} \sum_{k=0}^{r-1} {r-1 \choose k} \gamma_2^k \int_c^b x^2(x-b)^{2n-2k-1}dx +\end{align} +$$ + +$$ +\begin{align} +E(X_{(r)}^2) &= \gamma_0 \sum_{k=0}^{n-r} {n-r \choose k} \gamma_1^{k-n} (-1)^{n-r-k} \frac{(c-a)^{2n-2k}}{2n-2k}\left[c^2 - \frac{2c(c-a)}{2n-2k+1} + \frac{2(c-a)^2}{(2n-2k+1)(2n-2k+2)}\right] \\ +&+ \gamma_0 (-1)^{(n-r)} \sum_{k=0}^{r-1} {r-1 \choose k} \gamma_2^{k-n} \frac{(c-b)^{2n-2k}}{2n-2k}\left[-c^2 + \frac{2c(c-b)}{2n-2k+1}-\frac{2(c-b)^2}{(2n-2k+1)(2n-2k+2)}\right] +\end{align} +$$ + +$$ +\begin{align} +E(X_{(r)}^2) &= r {n \choose r} \sum_{k=0}^{n-r} {n-r \choose k} (b-a)^{k-n} (c-a)^{n-k} (-1)^{n-r-k} \frac{1}{n-k}\left[c^2 - \frac{2c(c-a)}{2n-2k+1} + \frac{2(c-a)^2}{(2n-2k+1)(2n-2k+2)}\right] \\ +&+ r {n \choose r} (-1)^{(n-r)} \sum_{k=0}^{r-1} {r-1 \choose k} (b-a)^{k-n} (c-b)^{n-k} \frac{1}{n-k}\left[-c^2 + \frac{2c(c-b)}{2n-2k+1}-\frac{2(c-b)^2}{(2n-2k+1)(2n-2k+2)}\right] +\end{align} +$$ -A closed form solution to $V(X_{(r)})$ is not easily obtainable for the triangle, -so numerical integration is used with $f(x)$ as `dtriangle` and $F(x)$ as `ptriangle`. +#### Variance of the $r^{th}$ order statistic -$$V\left(X_{(r)}\right) = \int_a^b x^2 f(x_{(r)}) dx - \left[\int_a^b x f(x_{(r)}) dx \right]^2$$ +$$V\left(X_{(r)}\right) = E(X_{(r)}^2) - \left[E(X_{(r)})\right]^2$$