From 5b782e9b8e098a3a3c8acf66162cce74611c88e5 Mon Sep 17 00:00:00 2001 From: melina-leite Date: Wed, 25 Sep 2024 10:21:44 +0200 Subject: [PATCH] createData issue #436 for temporalAutocorrelation --- Code/DHARMaIssues/436.R | 246 ++++++++++++++++++++++++++++++++++++++++ DHARMa/R/createData.R | 13 ++- 2 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 Code/DHARMaIssues/436.R diff --git a/Code/DHARMaIssues/436.R b/Code/DHARMaIssues/436.R new file mode 100644 index 00000000..d31f1339 --- /dev/null +++ b/Code/DHARMaIssues/436.R @@ -0,0 +1,246 @@ +## Testing the createData with/without temporal autocorrelation +# Melina Leite +# 24 sept 2024 + + +# While doing unit testing for Mixed Models, I realized that if the model doesn't estiamte the correct variance of the group RE, this extra variability may end in the temporal autocorrelation becuase the way we create the column `time` in the dataset is sequential (which is also the same for creating groups), so the time is clustered in the groups. + +set.seed(123) +testData <- createData(sampleSize = 200, + overdispersion = 0, randomEffectVariance = 0.5, + family = gaussian()) +head(testData) + +fittedModel <- lme4::lmer(observedResponse ~ Environment1 + (1|group), + data = testData) +summary(fittedModel) +simulationOutput <- simulateResiduals(fittedModel, n=200) +testTemporalAutocorrelation(simulationOutput = simulationOutput, + time = testData$time) + + +set.seed(123) +testData <- createData(sampleSize = 200, temporalAutocorrelation = 1, + overdispersion = 0, randomEffectVariance = 0.5, + family = gaussian()) +head(testData) + +fittedModel <- lme4::lmer(observedResponse ~ Environment1 + (1|group), + data = testData) +summary(fittedModel) +simulationOutput <- simulateResiduals(fittedModel, n=200) +testTemporalAutocorrelation(simulationOutput = simulationOutput, + time = testData$time) + + + +createData2 <- function(sampleSize = 100, intercept = 0, fixedEffects = 1, + quadraticFixedEffects = NULL, numGroups = 10, + randomEffectVariance = 1, overdispersion = 0, + family = poisson(), scale = 1, cor = 0, + roundPoissonVariance = NULL, pZeroInflation = 0, + binomialTrials = 1, temporalAutocorrelation = 0, + spatialAutocorrelation = 0, factorResponse = FALSE, + replicates = 1, hasNA = FALSE){ + + nPredictors = length(fixedEffects) + + out = list() + + time = sample(1:sampleSize, sampleSize) # change here + x = runif(sampleSize) + y = runif(sampleSize) + + for (i in 1:replicates){ + + ######################################################################## + # Create predictors + + predictors = matrix(runif(nPredictors*sampleSize, min = -1), ncol = nPredictors) + + if (cor != 0){ + predTemp <- runif(sampleSize, min = -1) + predictors = (1-cor) * predictors + cor * matrix(rep(predTemp, nPredictors), ncol = nPredictors) + } + + colnames(predictors) = paste("Environment", 1:nPredictors, sep = "") + + ######################################################################## + # Create random effects + + group = rep(1:numGroups, each = sampleSize/numGroups) + groupRandom = rnorm(numGroups, sd = sqrt(randomEffectVariance)) + + ######################################################################## + # Creation of linear prediction + + linearResponse = intercept + predictors %*% fixedEffects + groupRandom[group] + + if(!is.null(quadraticFixedEffects)){ + linearResponse = linearResponse + predictors^2 %*% quadraticFixedEffects + } + + ######################################################################## + # Overdispersion on linear predictor + + + if(is.numeric(overdispersion)) linearResponse = linearResponse + rnorm(sampleSize, sd = overdispersion) + if(is.function(overdispersion)) linearResponse = linearResponse + overdispersion(linearResponse) + + + ######################################################################## + # Autocorrelation + + if(!(temporalAutocorrelation == 0)){ + t = 1:sampleSize # INCLUDING CODE HERE + distMat <- as.matrix(dist(t)) + + invDistMat <- 1/distMat * 5000 + diag(invDistMat) <- 0 + invDistMat = sfsmisc::posdefify(invDistMat) + + temporalError <- MASS::mvrnorm(n = 1, mu = rep(0,sampleSize), Sigma = invDistMat) + + linearResponse = linearResponse + temporalAutocorrelation * temporalError[time] # + } + + + if(!(spatialAutocorrelation == 0)) { + distMat <- as.matrix(dist(cbind(x, y))) + + invDistMat <- 1/distMat * 5000 + diag(invDistMat) <- 0 + invDistMat = sfsmisc::posdefify(invDistMat) + + spatialError <- MASS::mvrnorm(n = 1, mu = rep(0,sampleSize), Sigma = invDistMat) + + linearResponse = linearResponse + spatialAutocorrelation * spatialError + } + + + ######################################################################## + # Link and distribution + + linkResponse = family$linkinv(linearResponse) + + if (family$family == "gaussian") observedResponse = rnorm(n = sampleSize, mean = linkResponse, sd = scale) + # need checking else if (family$family == "gamma") observedResponse = rgamma(n = sampleSize, shape = linkResponse / scale, scale = scale) + else if (family$family == "binomial"){ + observedResponse = rbinom(n = sampleSize, binomialTrials, prob = linkResponse) + if (binomialTrials > 1) observedResponse = cbind(observedResponse1 = observedResponse, observedResponse0 = binomialTrials - observedResponse) + } + else if (family$family == "poisson") { + if(is.null(roundPoissonVariance)) observedResponse = rpois(n = sampleSize, lambda = linkResponse) + else observedResponse = round(rnorm(n = length(linkResponse), mean = linkResponse, sd = roundPoissonVariance)) + } + else if (grepl("Negative Binomial",family$family)) { + theta = as.numeric(gsub("[\\(\\)]", "", regmatches(family$family, gregexpr("\\(.*?\\)", family$family))[[1]])) + observedResponse = MASS::rnegbin(linkResponse, theta = theta) + } + else stop("wrong link argument supplied") + + ######################################################################## + # Zero-inflation + + if(pZeroInflation != 0){ + artificialZeros = rbinom(n = length(observedResponse), size = 1, prob = 1-pZeroInflation) + observedResponse = observedResponse * artificialZeros + } + + + if(factorResponse) observedResponse = factor(observedResponse) + + # add spatialError? + + out[[i]] <- data.frame(ID = 1:sampleSize, observedResponse, predictors, group = as.factor(group), time, x, y) + } + if(length(out) == 1) out = out[[1]] + + if(hasNA) out[1,3] = NA + + return(out) +} + +############## +set.seed(123) +testData2 <- createData2(sampleSize = 200, + overdispersion = 0, randomEffectVariance = 0.5, + family = gaussian()) +head(testData2) + +fittedModel <- lme4::lmer(observedResponse ~ Environment1 + (1|group), + data = testData2) +summary(fittedModel) +simulationOutput <- simulateResiduals(fittedModel, n=200) +testTemporalAutocorrelation(simulationOutput = simulationOutput, + time = testData2$time) + + +set.seed(123) +testData2 <- createData2(sampleSize = 100, temporalAutocorrelation = 2, + overdispersion = 0, randomEffectVariance = 0.5, + family = gaussian()) +head(testData2) + +fittedModel <- lme4::lmer(observedResponse ~ Environment1 + (1|group), + data = testData2) +summary(fittedModel) +simulationOutput <- simulateResiduals(fittedModel, n=200) +testTemporalAutocorrelation(simulationOutput = simulationOutput, + time = testData2$time) + + + +## copying test function: + +testTemporalAutocorrelation2 <- function(simulationOutput, time, alternative = c("two.sided", "greater", "less"), plot = TRUE){ + + simulationOutput = ensureDHARMa(simulationOutput, convert = T) + + # actually not sure if this is neccessary for dwtest, but seems better to aggregate + if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testTemporalAutocorrelation.") + + alternative <- match.arg(alternative) + + if(is.null(time)){ + time = sample.int(simulationOutput$nObs, simulationOutput$nObs) + message("DHARMa::testTemporalAutocorrelation - no time argument provided, using random times for each data point") + } + + # To avoid Issue #190 + if (length(time) != length(residuals(simulationOutput))) stop("Dimensions of time don't match the dimension of the residuals") + + out = lmtest::dwtest(simulationOutput$scaledResiduals ~ 1, order.by = time, alternative = alternative) + + if(plot == T) { + oldpar <- par(mfrow = c(1,2)) + on.exit(par(oldpar)) + + plot(simulationOutput$scaledResiduals[order(time)] ~ time[order(time)], + type = "l", ylab = "Scaled residuals", xlab = "Time", main = "Residuals vs. time", ylim = c(0,1)) + + abline(h=c(0.5)) + abline(h=c(0,0.25,0.75,1), lty = 2 ) + + acf(simulationOutput$scaledResiduals[order(time)], main = "Autocorrelation", ylim = c(-1,1)) + legend("topright", + c(paste(out$method, " p=", round(out$p.value, digits = 5)), + paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), + text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n") + + } + + return(out) +} + + + + + + + + + + + + diff --git a/DHARMa/R/createData.R b/DHARMa/R/createData.R index 6d841064..cad5029e 100644 --- a/DHARMa/R/createData.R +++ b/DHARMa/R/createData.R @@ -20,7 +20,14 @@ #' @param hasNA should an NA be added to the environmental predictor (for test purposes). #' @export #' @example /inst/examples/createDataHelp.R -createData <- function(sampleSize = 100, intercept = 0, fixedEffects = 1, quadraticFixedEffects = NULL, numGroups = 10, randomEffectVariance = 1, overdispersion = 0, family = poisson(), scale = 1, cor = 0, roundPoissonVariance = NULL, pZeroInflation = 0, binomialTrials = 1, temporalAutocorrelation = 0, spatialAutocorrelation =0, factorResponse = FALSE, replicates=1, hasNA = FALSE){ +createData <- function(sampleSize = 100, intercept = 0, fixedEffects = 1, + quadraticFixedEffects = NULL, numGroups = 10, + randomEffectVariance = 1, overdispersion = 0, + family = poisson(), scale = 1, cor = 0, + roundPoissonVariance = NULL, pZeroInflation = 0, + binomialTrials = 1, temporalAutocorrelation = 0, + spatialAutocorrelation = 0, factorResponse = FALSE, + replicates = 1, hasNA = FALSE){ nPredictors = length(fixedEffects) @@ -77,7 +84,7 @@ createData <- function(sampleSize = 100, intercept = 0, fixedEffects = 1, quadra diag(invDistMat) <- 0 invDistMat = sfsmisc::posdefify(invDistMat) - temporalError <- MASS::mvrnorm(n=1, mu=rep(0,sampleSize), Sigma=invDistMat) + temporalError <- MASS::mvrnorm(n = 1, mu = rep(0,sampleSize), Sigma = invDistMat) linearResponse = linearResponse + temporalAutocorrelation * temporalError } @@ -90,7 +97,7 @@ createData <- function(sampleSize = 100, intercept = 0, fixedEffects = 1, quadra diag(invDistMat) <- 0 invDistMat = sfsmisc::posdefify(invDistMat) - spatialError <- MASS::mvrnorm(n=1, mu=rep(0,sampleSize), Sigma=invDistMat) + spatialError <- MASS::mvrnorm(n = 1, mu = rep(0,sampleSize), Sigma = invDistMat) linearResponse = linearResponse + spatialAutocorrelation * spatialError }