diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 0000000..81b960f --- /dev/null +++ b/.Rprofile @@ -0,0 +1 @@ +source("renv/activate.R") diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..411408d --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata + +_extensions +renv +/.quarto/ diff --git a/LICENSE b/LICENSE index 0e259d4..2f244ac 100644 --- a/LICENSE +++ b/LICENSE @@ -1,121 +1,395 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More_considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000..61dcab4 --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,5 @@ +project: + type: website + title: "submitted-202310-favrot-pest" + + diff --git a/data/MCMC_samples_of_predictions.rds b/data/MCMC_samples_of_predictions.rds new file mode 100644 index 0000000..6d3e7aa Binary files /dev/null and b/data/MCMC_samples_of_predictions.rds differ diff --git a/data/data_figure_1A.rds b/data/data_figure_1A.rds new file mode 100644 index 0000000..596a7f6 Binary files /dev/null and b/data/data_figure_1A.rds differ diff --git a/data/data_figure_1B.rds b/data/data_figure_1B.rds new file mode 100644 index 0000000..cd53378 Binary files /dev/null and b/data/data_figure_1B.rds differ diff --git a/data/real_data_extract.rds b/data/real_data_extract.rds new file mode 100644 index 0000000..fc56b57 Binary files /dev/null and b/data/real_data_extract.rds differ diff --git a/figures/fig1.png b/figures/fig1.png new file mode 100644 index 0000000..cf9d35d Binary files /dev/null and b/figures/fig1.png differ diff --git a/figures/sfds.png b/figures/sfds.png new file mode 100644 index 0000000..f31e3b2 Binary files /dev/null and b/figures/sfds.png differ diff --git a/functions/gelman.R b/functions/gelman.R new file mode 100644 index 0000000..cdc9281 --- /dev/null +++ b/functions/gelman.R @@ -0,0 +1,258 @@ + + + +# Functions of gelman.R, in the coda package. Ref : https://rdrr.io/cran/coda/src/R/gelman.R +# Functions to recode gelman.plot with ggplot2 (cf /functions/gelman.plot2.R) + + +"gelman.diag" <- function (x, confidence = 0.95, transform = FALSE, + autoburnin=TRUE, multivariate=TRUE) + ## Gelman and Rubin's diagnostic + ## Gelman, A. and Rubin, D (1992). Inference from iterative simulation + ## using multiple sequences. Statistical Science, 7, 457-551. + ## + ## Correction and Multivariate generalization: + ## Brooks, S.P. and Gelman, A. (1997) General methods for monitoring + ## convergence of iterative simulations. Journal of Computational and + ## Graphical Statistics, 7, 434-455. + +{ + x <- as.mcmc.list(x) + if (nchain(x) < 2) + stop("You need at least two chains") + ## RGA added an autoburnin parameter here, because if I have already + ## trimmed burn in, I don't want to do it again. + if (autoburnin && start(x) < end(x)/2 ) + x <- window(x, start = end(x)/2 + 1) + Niter <- niter(x) + Nchain <- nchain(x) + Nvar <- nvar(x) + xnames <- varnames(x) + + if(transform) + x <- gelman.transform(x) + ## + ## Estimate mean within-chain variance (W) and between-chain variance + ## (B/Niter), and calculate sampling variances and covariance of the + ## estimates (varW, varB, covWB) + ## + ## Multivariate (upper case) + x <- lapply(x, as.matrix) + S2 <- array(sapply(x, var, simplify=TRUE), dim=c(Nvar,Nvar,Nchain)) + W <- apply(S2, c(1,2), mean) + xbar <- matrix(sapply(x, apply, 2, mean, simplify=TRUE), nrow=Nvar, + ncol=Nchain) + B <- Niter * var(t(xbar)) + + if(Nvar > 1 && multivariate) { + ## We want the maximal eigenvalue of the square matrix X that + ## solves WX = B. It is numerically easier to work with a + ## symmetric matrix that has the same eigenvalues as X. + if (is.R()) { + CW <- chol(W) + emax <- eigen(backsolve(CW, t(backsolve(CW, B, transpose=TRUE)), + transpose=TRUE), + symmetric=TRUE, only.values=TRUE)$values[1] + } + else { + emax <- eigen(qr.solve(W,B), symmetric=FALSE, only.values=TRUE)$values + } + mpsrf <- sqrt( (1 - 1/Niter) + (1 + 1/Nvar) * emax/Niter ) + } + else + mpsrf <- NULL + ## Univariate (lower case) + w <- diag(W) + b <- diag(B) + + + s2 <- matrix(apply(S2, 3, diag), nrow=Nvar, ncol=Nchain) + muhat <- apply(xbar,1,mean) + var.w <- apply(s2, 1, var)/Nchain + var.b <- (2 * b^2)/(Nchain - 1) + cov.wb <- (Niter/Nchain) * diag(var(t(s2), t(xbar^2)) - + 2 * muhat * var(t(s2), t(xbar))) + ## + ## Posterior interval combines all uncertainties in a t interval with + ## center muhat, scale sqrt(V), and df.V degrees of freedom. + ## + V <- (Niter - 1) * w / Niter + (1 + 1/Nchain) * b/ Niter + var.V <- ((Niter - 1)^2 * var.w + (1 + 1/Nchain)^2 * + var.b + 2 * (Niter - 1) * (1 + 1/Nchain) * cov.wb)/Niter^2 + df.V <- (2 * V^2)/var.V + ## + ## Potential scale reduction factor (that would be achieved by + ## continuing simulations forever) is estimated by + ## R = sqrt(V/W) * df.adj + ## where df.adj is a degrees of freedom adjustment for the width + ## of the t-interval. + ## + ## To calculate upper confidence interval we divide R2 = R^2 into two + ## parts, fixed and random. The upper limit of the random part is + ## calculated assuming that B/W has an F distribution. + ## + df.adj <- (df.V + 3)/(df.V + 1) + B.df <- Nchain - 1 + W.df <- (2 * w^2)/var.w + R2.fixed <- (Niter - 1)/Niter + R2.random <- (1 + 1/Nchain) * (1/Niter) * (b/w) + R2.estimate <- R2.fixed + R2.random + R2.upper <- R2.fixed + qf((1 + confidence)/2, B.df, W.df) * R2.random + psrf <- cbind(sqrt(df.adj * R2.estimate), sqrt(df.adj * R2.upper)) + dimnames(psrf) <- list(xnames, c("Point est.", "Upper C.I.")) + + out <- list(psrf = psrf, mpsrf=mpsrf) + class(out) <- "gelman.diag" + out +} + +"gelman.transform" <- function(x) + ## Gelman and Rubin diagnostic assumes a normal distribution. To + ## improve the normal approximation, variables on [0, Inf) are log + ## transformed, and variables on [0,1] are logit-transformed. +{ + if (!is.R()) { + # in S-PLUS this function generates a superfluous warning, + # so turn off all warnings during the function. + oldWarn <- getOption("warn") + options(warn=-1) + on.exit(options (warn=oldWarn)) + } + if (nvar(x) == 1) { + z <- data.frame(lapply(x, unclass)) + if (min(z) > 0) { + y <- if(max(z) < 1) + log(z/(1-z)) + else log(z) + for (j in 1:nchain(x)) x[[j]] <- y[,j] + } + } + else for (i in 1:nvar(x)) { + z <- data.frame(lapply(x[, i], unclass)) + if (min(z) > 0) { + y <- if (max(z) < 1) + log(z/(1 - z)) + else log(z) + for (j in 1:nchain(x)) x[[j]][, i] <- y[, j] + } + } + return(x) +} + +"gelman.mv.diag" <- function (x, confidence = 0.95, transform = FALSE) +{ + s2 <- sapply(x, var, simplify=TRUE) + W <- matrix(apply(s2, 1, mean), nvar(x), nvar(x)) + xbar <- sapply(x, apply, 2, mean, simplify=TRUE) + B <- niter(x) * var(t(xbar)) + emax <- eigen(qr.solve(W,B), symmetric=FALSE, only.values=TRUE)$values[1] + mpsrf <- sqrt( (1 - 1/niter(x)) + (1 + 1/nvar(x)) * emax ) + return(mpsrf) +} + + +"print.gelman.diag" <- + function (x, digits = 3, ...) + { + cat("Potential scale reduction factors:\n\n") + print.default(x$psrf, digits = digits, ...) + if(!is.null(x$mpsrf)) { + cat("\nMultivariate psrf\n\n") + cat(format(x$mpsrf,digits = digits)) + } + cat("\n") + } + +"gelman.plot" <- + function (x, bin.width = 10, max.bins = 50, confidence = 0.95, + transform = FALSE, autoburnin = TRUE, auto.layout = TRUE, ask, + col = 1:2, lty = 1:2, xlab = "last iteration in chain", + ylab = "shrink factor", type = "l", ...) + { + if (missing(ask)) { + ask <- if (is.R()) { + dev.interactive() + } + else { + interactive() + } + } + x <- as.mcmc.list(x) + oldpar <- NULL + on.exit(par(oldpar)) + if (auto.layout) + oldpar <- par(mfrow = set.mfrow(Nchains = nchain(x), Nparms = nvar(x))) + y <- gelman.preplot(x, bin.width = bin.width, max.bins = max.bins, + confidence = confidence, transform = transform, + autoburnin = autoburnin) + all.na <- apply(is.na(y$shrink[, , 1, drop = FALSE]), 2, all) + if (!any(all.na)) + for (j in 1:nvar(x)) { + matplot(y$last.iter, y$shrink[, j, ], col = col, + lty = lty, xlab = xlab, ylab = ylab, type = type, + ...) + abline(h = 1) + ymax <- max(c(1, y$shrink[, j, ]), na.rm = TRUE) + leg <- dimnames(y$shrink)[[3]] + xmax <- max(y$last.iter) + legend(xmax, ymax, legend = leg, lty = lty, bty = "n", + col = col, xjust = 1, yjust = 1) + title(main = varnames(x)[j]) + if (j==1) + oldpar <- c(oldpar, par(ask = ask)) + } + return(invisible(y)) + } + +"gelman.preplot" <- + function (x, bin.width = bin.width, max.bins = max.bins, + confidence = confidence, transform = transform, + autoburnin = autoburnin) + { + x <- as.mcmc.list(x) + nbin <- min(floor((niter(x) - 50)/thin(x)), max.bins) + if (nbin < 1) { + stop("Insufficient iterations to produce Gelman-Rubin plot") + } + binw <- floor((niter(x) - 50)/nbin) + last.iter <- c(seq(from = start(x) + 50 * thin(x), by = binw * + thin(x), length = nbin), end(x)) + shrink <- array(dim = c(nbin + 1, nvar(x), 2)) + dimnames(shrink) <- list(last.iter, varnames(x), + c("median", paste(50 * (confidence + 1), "%", + sep = "")) + ) + for (i in 1:(nbin + 1)) { + shrink[i, , ] <- gelman.diag(window(x, end = last.iter[i]), + confidence = confidence, + transform = transform, + autoburnin = autoburnin, + multivariate = FALSE)$psrf + } + all.na <- apply(is.na(shrink[, , 1, drop = FALSE]), 2, all) + if (any(all.na)) { + cat("\n******* Error: *******\n") + cat("Cannot compute Gelman & Rubin's diagnostic for any chain \n") + cat("segments for variables", varnames(x)[all.na], "\n") + cat("This indicates convergence failure\n") + } + return(list(shrink = shrink, last.iter = last.iter)) + } + +if (!is.R()){ + + qr.solve <- function (a, b, tol = 1e-07) { + if (!is.qr(a)) + a <- qr(a, tol = tol) + nc <- ncol(a$qr) + if (a$rank != nc) + stop("singular matrix 'a' in solve") + if (missing(b)) { + if (nc != nrow(a$qr)) + stop("only square matrices can be inverted") + b <- diag(1, nc) + } + return(qr.coef(a, b)) + } + +} \ No newline at end of file diff --git a/functions/gelman.plot2.R b/functions/gelman.plot2.R new file mode 100644 index 0000000..1d2f2eb --- /dev/null +++ b/functions/gelman.plot2.R @@ -0,0 +1,64 @@ + +source(file = "functions/gelman.R") + +# function adapted from gelman.plot, using ggplot instead of plot + +gelman.plot2 <- function (x, coef, bin.width = 10, max.bins = 50, confidence = 0.95, + transform = FALSE, autoburnin = TRUE, auto.layout = TRUE, + ask, col = 1:2, lty = 1:2, xlab = "last iteration in chain", + ylab = "shrink factor", type = "l", ncol = 4, ...){ + exclude = which((summary(x)$statistics %>% rownames) %in% c("gamma0[1]", "gamma1[1]")) + x = x[, - exclude] + if (missing(ask)) { + ask <- if (is.R()) { + dev.interactive() + } + else { + interactive() + } + } + x <- as.mcmc.list(x) + oldpar <- NULL + on.exit(par(oldpar)) + + y <- gelman.preplot(x, bin.width = bin.width, max.bins = max.bins, + confidence = confidence, transform = transform, autoburnin = autoburnin) + all.na <- apply(is.na(y$shrink[, , 1, drop = FALSE]), 2, + all) + l = list() + df_value = data.frame(median = NULL, bsup = NULL, Param = NULL, iter = NULL) + coef = rownames(summary(x)$statistics) + if (!any(all.na)){ + for (j in 1:nvar(x)) { + df_temp = as.data.frame(y$shrink[, j, ]); df_temp$Param = coef[j]; df_temp$iter = y$last.iter; colnames(df_temp)[c(1, 2)] = c("median", "97.5%") + df_value = rbind(df_value, df_temp) + } + } + + df_res = df_value %>% pivot_longer(cols = c(median, `97.5%`)) %>% as.data.frame %>% + mutate(Param = recode(Param, "alpha0" = "c100", "alpha1" = "c101", "gamma0[2]" = "c102", + "gamma0[3]" = "c104", "gamma0[4]" = "c105", "gamma1[2]" = "c106", + "gamma1[3]" = "c107", "gamma1[4]" = "c108", "sigma0" = "c109", "chi" = "c110", "eta" = "c111", + "Eff[2,1]" = "c112", "Eff[3,1]" = "c113", "Eff[4,1]" = "c114", + "Eff[2,2]" = "c115", "Eff[3,2]" = "c116", "Eff[4,2]" = "c117")) + + titre1 = TeX("$\\alpha_0$"); titre2 = TeX("$\\alpha_1$"); + + titre3 = TeX("$\\gamma_0 - Mavrik Jet$"); titre4 = TeX("$\\gamma_0 - Movento$"); titre5 = TeX("$\\gamma_0 - Teppeki$"); + titre6 = TeX("$\\gamma_1 - Mavrik Jet$"); titre7 = TeX("$\\gamma_1 - Movento$"); titre8 = TeX("$\\gamma_1 - Teppeki$"); + + titre9 = TeX("$\\sigma_0$"); titre10 = TeX("$\\chi$"); titre11 = TeX("$\\eta$"); + + titre12 = TeX("$Ef_6 - Mavrik Jet$"); titre13 = TeX("$Ef_6 - Movento$"); titre14 = TeX("$Ef_6 - Teppeki$") + titre15 = TeX("$Ef_{12} - Mavrik Jet$"); titre16 = TeX("$Ef_{12} - Movento$"); titre17 = TeX("$Ef_{12} - Teppeki$") + + df_res = df_res %>% mutate(Param = as.factor(Param)) + + levels(df_res$Param) = c(titre1, titre2, titre3, titre4, titre5, titre6, titre7, titre8, titre9, titre10, titre11, + titre12, titre13, titre14, titre15, titre16, titre17) + + return(ggplot(df_res) + geom_line(aes(x = iter, y = value, color = name, linetype = name)) + facet_wrap(~ Param, ncol = ncol, labeller = label_parsed) + + xlab("Last iteration in chain") + ylab("Shrink factor") + theme(legend.position = "bottom") + theme(legend.title = element_blank()) + + scale_linetype_manual(values = c("dotted", "solid")) + + scale_color_manual(values = c("red", "black"))) +} \ No newline at end of file diff --git a/functions/plot_chains.R b/functions/plot_chains.R new file mode 100644 index 0000000..32ba668 --- /dev/null +++ b/functions/plot_chains.R @@ -0,0 +1,35 @@ + +# function to plot the MCMC algorithm's chains obtained from the inference on real data + +plot_chains <- function(samp, nrow = 3){ + + n = dim(samp[[1]])[1] + p = dim(samp[[1]])[2] + + df_temp = rbind(samp[[1]] %>% as.data.frame %>% mutate(i = c(1 : n), chaine = "1"), + samp[[2]] %>% as.data.frame %>% mutate(i = c(1 : n), chaine = "2")) %>% select(- `gamma0[1]`, - `gamma1[1]`) + + df_plot = df_temp %>% pivot_longer(cols = c(1 : (p - 2))) %>% as.data.frame %>% + mutate(name = recode(name, "alpha0" = "c100", "alpha1" = "c101", "gamma0[2]" = "c102", + "gamma0[3]" = "c104", "gamma0[4]" = "c105", "gamma1[2]" = "c106", + "gamma1[3]" = "c107", "gamma1[4]" = "c108", "sigma0" = "c109", "chi" = "c110", "eta" = "c111", + "Eff[2,1]" = "c112", "Eff[3,1]" = "c113", "Eff[4,1]" = "c114", + "Eff[2,2]" = "c115", "Eff[3,2]" = "c116", "Eff[4,2]" = "c117")) + + titre1 = TeX("$\\alpha_0$"); titre2 = TeX("$\\alpha_1$"); + + titre3 = TeX("$\\gamma_0 - Mavrik Jet$"); titre4 = TeX("$\\gamma_0 - Movento$"); titre5 = TeX("$\\gamma_0 - Teppeki$"); + titre6 = TeX("$\\gamma_1 - Mavrik Jet$"); titre7 = TeX("$\\gamma_1 - Movento$"); titre8 = TeX("$\\gamma_1 - Teppeki$"); + + titre9 = TeX("$\\sigma_0$"); titre10 = TeX("$\\chi$"); titre11 = TeX("$\\eta$"); + + titre12 = TeX("$Ef_6 - Mavrik Jet$"); titre13 = TeX("$Ef_6 - Movento$"); titre14 = TeX("$Ef_6 - Teppeki$") + titre15 = TeX("$Ef_{12} - Mavrik Jet$"); titre16 = TeX("$Ef_{12} - Movento$"); titre17 = TeX("$Ef_{12} - Teppeki$") + + df_plot = df_plot %>% mutate(name = as.factor(name)) + + levels(df_plot$name) = c(titre1, titre2, titre3, titre4, titre5, titre6, titre7, titre8, titre9, titre10, titre11, + titre12, titre13, titre14, titre15, titre16, titre17) + + ggplot(df_plot) + geom_line(aes(x = i, y = value, color = chaine)) + facet_wrap(~ name, scales = "free_y", nrow = nrow, labeller = label_parsed) + theme(legend.position = "none") + xlab("Iteration") + ylab("Sampled value") +} \ No newline at end of file diff --git a/functions/simu_data.R b/functions/simu_data.R new file mode 100644 index 0000000..cebb26d --- /dev/null +++ b/functions/simu_data.R @@ -0,0 +1,53 @@ +simu_data <- function(seed, I){ + + set.seed(seed) + + ID = c(1 : I); + + data = expand_grid(ID = c(1 : I), Block = c(1 : K), Band = c(1 : J), + Beet = c(1 : N), DPT = seq(0, 12, length.out = T)) + + data = data %>% + mutate(Insecticide = ifelse(DPT <= 0, "T 1", paste("T", Band)), + tscaled = scale(data$DPT)) %>% + mutate(st = paste(ID, Insecticide), + sbit = paste(ID, Block, Insecticide, DPT)) + + Insecticide = data$Insecticide %>% unique %>% sort + names(gamma0) = Insecticide + names(gamma1) = Insecticide + + beta0 = rnorm(I, sd = sig0); + u = rnorm(I * J, sd = chi); + epsi = rnorm(I * J * K * (T - 1) + I * K, sd = eta); + + names(beta0) = data$ID %>% unique; + names(u) = data$st %>% unique + names(epsi) = data$sbit %>% unique + + data = data %>% mutate(alpha0 = alpha0, + alpha1 = alpha1, + N = N, + beta0 = recode(ID, !!!beta0), + gamma0 = recode(Insecticide, !!!gamma0), + gamma1 = recode(Insecticide, !!!gamma1), + u = recode(st, !!!u), + epsi = recode(sbit, !!!epsi)) + + data$lb = exp(data$alpha0 + data$beta0 + data$gamma0 + + (data$alpha1 + data$gamma1) * data$tscaled + + data$u + data$epsi) + + data$W = sapply(c(1 : (I * J * K * T * N)), + function(x) rpois(1, data$lb[x])) + + dataYZ = data %>% group_by(ID, Block, Band, DPT) %>% + summarise(Insecticide = unique(Insecticide), + tscaled = unique(tscaled), st = unique(st), N = unique(N), + Y = sum(W), Z = sum(W > 0)) %>% as.data.frame + + dataW = data %>% select(- alpha0, - alpha1, - beta0, - gamma0, - gamma1, + - u, - epsi, - lb) + + return(list("dataYZ" = dataYZ, "dataW" = dataW)) +} diff --git a/jags_models/modelW.txt b/jags_models/modelW.txt new file mode 100644 index 0000000..6b33031 --- /dev/null +++ b/jags_models/modelW.txt @@ -0,0 +1,49 @@ + +model { + + ###################### Likelyhood ###################### + for (i in 1:Q){ + W[i] ~ dpois(lb[i]) + log(lb[i]) = beta0[ID[i]] + gamma0[INSEC[i]] + + (alpha1 + gamma1[INSEC[i]]) * TIME[i] + u[ST[i]] + epsi[SBIT[i]] + } + + for (j in 1:K){ + beta0[j] ~ dnorm(alpha0, tau0) + } + + for (c in 1:M){ + u[c] ~ dnorm(0, invchi) + } + + for (a in 1:X){ + epsi[a] ~ dnorm(0, pi_eps) + } + + gamma0[1] = 0 + gamma1[1] = 0 + + ######################## Priors ######################## + for (s in 2:L){ + gamma0[s] ~ dnorm(0, 0.001) + gamma1[s] ~ dnorm(0, 0.001) + } + + alpha0 ~ dnorm(0, 0.001) + alpha1 ~ dnorm(0, 0.001) + sigma0 ~ dunif(0, 10) + chi ~ dunif(0, 10) + eta ~ dunif(0, 10) + + ################### Derived Quantities ################# + tau0 = pow(sigma0, -2) + invchi = pow(chi, -2) + pi_eps = pow(eta, -2) + + for (h in 2:L){ + for(t in 1 : T){ + Eff[h, t] = (1 - exp(gamma0[h] + gamma1[h] * TIME_unique[t])) * 100 + } + } +} + diff --git a/jags_models/modelYZ.txt b/jags_models/modelYZ.txt new file mode 100644 index 0000000..f8b2910 --- /dev/null +++ b/jags_models/modelYZ.txt @@ -0,0 +1,51 @@ + + model { + + # Likelihood ##################################################### + for (i in 1:Q){ + Y[i] ~ dpois(N[i] * lb[i]) + Z[i] ~ dbinom(pi[i], N[i]) + + log(lb[i]) = beta0[ID[i]] + gamma0[INSEC[i]] + (alpha1 + + gamma1[INSEC[i]]) * TIME[i] + u[ST[i]] + epsi[i] + + pi[i] = 1 - exp(- lb[i]) + epsi[i] ~ dnorm(0, pi_eps) + } + + for (j in 1:K){ + beta0[j] ~ dnorm(alpha0, tau0) + } + + for (c in 1:M){ + u[c] ~ dnorm(0, invchi) + } + + gamma0[1] = 0 + gamma1[1] = 0 + + # Priors ######################################################### + for (s in 2:L){ + gamma0[s] ~ dnorm(0, 0.001) + gamma1[s] ~ dnorm(0, 0.001) + } + + alpha0 ~ dnorm(0, 0.001) + alpha1 ~ dnorm(0, 0.001) + sigma0 ~ dunif(0, 10) + chi ~ dunif(0, 10) + eta ~ dunif(0, 10) + + # Derived Quantities ############################################# + tau0 = pow(sigma0, -2) + invchi = pow(chi, -2) + pi_eps = pow(eta, -2) + + for (h in 2:L){ + for(t in 1 : T){ + Eff[h, t] = (1 - exp(gamma0[h] + gamma1[h] * + TIME_unique[t])) * 100 + } + } + } + diff --git a/logo_text_white.pdf b/logo_text_white.pdf new file mode 100755 index 0000000..f227eba Binary files /dev/null and b/logo_text_white.pdf differ diff --git a/published-202312-favrot-hierarchical-supp.qmd b/published-202312-favrot-hierarchical-supp.qmd new file mode 100644 index 0000000..c2516c2 --- /dev/null +++ b/published-202312-favrot-hierarchical-supp.qmd @@ -0,0 +1,43 @@ +--- +title: "Supplementary material" +format: pdf +--- + +```{r, message = FALSE, echo = FALSE} +library(ggplot2) +``` + + +# Model adjustment and comparison to the negative binomial model + +To check the model's fit to the data, we performed a posterior predictive check of our model to check that the data were compatible with the model assumptions. To do so, we computed the probability of exceeding each individual data with the fitted model (2). Note that the number of pest individuals per plant are not available in practice; the data correspond to observed numbers of pest individuals for groups of $N_i$ plants. Based on the posterior probability check, the computed probabilities were all falling in the range 0.22-0.93 (except for the observations equal to 0, for which the probability of being greater was equal to 1), and were thus not extreme. This result indicates that the model specified is not incompatible with the observed data and that the over-dispersion was correctly taken into account. + +We also fitted a new model including a negative binomial distribution instead of a Poisson distribution. The results were almost identical between both types of model. See the figure below. + +```{r, echo = FALSE} +MCMC_samples_of_predictions = readRDS(file = "data/MCMC_samples_of_predictions.rds") +``` + + +```{r, echo = FALSE} +#| fig-cap: "Posterior predictive check, The X-axis is on a logarithmic scale and represents the number of aphids increased by 1." + +ggplot(MCMC_samples_of_predictions) + + geom_point(aes(x = Y + 1, y = posterior_predictive_check)) + + scale_x_continuous(trans = "log2") + + ylab("Posterior probability") + + xlab("Observed number of aphids") + + facet_wrap(~ model) +``` + + +```{r, echo = FALSE} +#| fig-cap: "Observed vs predicted values" + +ggplot(MCMC_samples_of_predictions) + + geom_point(aes(x = Y, y = prediction)) + + xlab("Observed number of aphids") + + ylab("Predicted number of aphids") + + facet_wrap(~ model) +``` + diff --git a/published-202312-favrot-hierarchical.Rproj b/published-202312-favrot-hierarchical.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/published-202312-favrot-hierarchical.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/published-202312-favrot-hierarchical.qmd b/published-202312-favrot-hierarchical.qmd new file mode 100644 index 0000000..e7278ec --- /dev/null +++ b/published-202312-favrot-hierarchical.qmd @@ -0,0 +1,844 @@ +--- +title: "A hierarchical model to evaluate pest treatments from prevalence and intensity data" +author: + - name: "Armand Favrot" + corresponding: true + email: armand.favrot@inrae.fr + url: https://fr.linkedin.com/in/armand-favrot-469014150 + affiliations: + - name: MIA Paris-Saclay, INRAE AgroParisTech Université Paris-Saclay, France + url: https://mia-ps.inrae.fr/ + - name: "David Makowski" + corresponding: true + email: david.makowski@inrae.fr + url: https://mia-ps.inrae.fr/david-makowski + affiliations: + - name: MIA Paris-Saclay, INRAE AgroParisTech Université Paris-Saclay, France + url: https://mia-ps.inrae.fr/ +date: last-modified +description: | + +abstract: >+ + In plant epidemiology, pest abundance is measured in field trials using metrics assessing either pest prevalence (fraction of the plant population infected) or pest intensity (average number of pest individuals present in infected plants). Some of these trials rely on prevalence, while others rely on intensity, depending on the protocols. + + In this paper, we present a hierarchical Bayesian model able to handle both types of data. In this model, the intensity and prevalence variables are derived from a latent variable representing the number of pest individuals on each host individual, assumed to follow a Poisson distribution. Effects of pest treaments, time trend, and between-trial variability are described using fixed and random effects. + + We apply the model to a real dataset in the context of aphid control in sugar beet fields. In this dataset, prevalence and intensity were derived from aphid counts observed on either factorial trials testing different types of pesticides treatments or field surveys monitoring aphid abundance. + + Next, we perform simulations to assess the impacts of using either prevalence or intensity data, or both types of data simultaneously, on the accuracy of the model parameter estimates and on the ranking of pesticide treatment efficacy. + + Our results show that, when pest prevalence and pest intensity data are collected separately in different trials, the model parameters are more accurately estimated using both types of trials than using one type of trials only. When prevalence data are collected in all trials and intensity data are collected in a subset of trials, estimations and pest treatment ranking are more accurate using both types of data than using prevalence data only. When only one type of observation can be collected in a pest survey or in an experimental trial, our analysis indicates that it is better to collect intensity data than prevalence data when all or most of the plants are expected to be infested, but that both types of data lead to similar results when the level of infestation is low to moderate. Finally, our simulations show that it is unlikely to obtain accurate results with fewer than 40 trials when assessing the efficacy of pest control treatments based on prevalence and intensity data. + + Because of its flexibility, our model can be used to evaluate and rank the efficacy of pest treatments using either prevalence or intensity data, or both types of data simultaneously. As it can be easily implemented using standard Bayesian packages, we hope that it will be useful to agronomists, plant pathologists, and applied statisticians to analyze pest surveys and field experiments conducted to assess the efficacy of pest treatments. + +keywords: [bayesian model, epidemiology, hierarchical model, pest control, trial, survey] +doi: "to complete" +citation: + type: article-journal + container-title: "Computo" + issued: "2023/01/25" + url: https://computo.sfds.asso.fr/ + issn: "2824-7795" +google-scholar: true +bibliography: references.bib +github-user: computorg +repo: "To complete" +draft: false +published: false +format: + computo-pdf: default + computo-html: default +header-includes: + - \usepackage{lineno} + - \usepackage{soul} +--- + + +```{r dependencies, cache = FALSE, include = FALSE} +library(ggplot2); +library(tidyverse); +library(gridExtra); +library(rjags); +library(latex2exp); +library(grid); +library(dplyr); +library(tidyr); +library(tibble) +library(kableExtra) +library(coda) + +x = 6; y = 8; z = 7 +theme_replace(axis.text = element_text(size = x), axis.title.x = element_text(size = y), + axis.title.y = element_text(size = y, angle = 90, margin = margin(r = 5)), + strip.text = element_text(size = z, face = "bold", color = "white"), + legend.text = element_text(size = y), legend.title = element_text(size = y, face = "bold"), + plot.title = element_text(size = z, hjust = 0.5, face = "bold"), + strip.background = element_rect(fill = "#4345a1"), panel.background = element_rect(fill = "#f3faff"), panel.grid.major = element_line(colour = "white")) +options(ggplot2.discrete.fill = list("#67c5a7")) +``` + +\linenumbers + + + +# Introduction + +In plant epidemiology, pest and disease presence can be measured in a host population using different metrics. A first metric measures the presence/absence of the pest in the individuals (plants) of the host population. This metric is often called prevalence or incidence (@madden1999sampling, @shaw2018metrics). The prevalence describes the proportion of the host population in which the pest is present. This metric is relevant and widely used, but it does not account for the number of pest individuals per host individual. With the prevalence, a plant infected by one single pest individual (e.g., an insect) and a plant infected by many pest individuals both represents one infected plant. For this reason, pest abundance is sometimes assessed using another metric representing the average number of pest individuals per host individual. This metric is called intensity or severity (@madden1999sampling, @shaw2018metrics), and describes the intensity of the disease in the target population. These two metrics do not generally have the same requirement in terms of working time; measuring intensity takes indeed much more time than measuring prevalence because it is very tedious to count all pest individuals, especially when individuals are small, numerous, and/or difficult to detect. + +Pest prevalence and intensity are commonly measured in factorial field trials to test the efficacy of different treatments. In this paper, we place ourselves in an important application framework which is the evaluation of alternative pesticide treatments to neonicotinoids against aphids in sugar beet. Indeed, neonicotinoids had been a popular chemical treatment to control aphids for many years, especially in sugar beets, a major crop in Europe. Recently, neonicotinoids were recognized as presenting high risks for the environment with a negative impact on a wide range of non-target organisms, including bees (@wood2017environmental, @pisa2015effects), and this familly of pesticides has been banned in several European countries. In order to find a substitute to neonicotinoids, a number of factorial field trials were conducted to compare the efficacy of different alternative treatments during several years in different countries. Each trial consists of a set of plots divided into several blocks, themselves divided into several strips on which different pesticide treatments are randomly allocated. One strip always remains untreated to serve as a control. In each strip, aphid prevalence, aphid intensity or both metrics are measured in a sample of plants (usually, 10-20 plants per strip). Depending on the protocol and on the working time constraint, either one type or both types of metrics are measured. Consequently, for a given pest treatment, some trials may report prevalence data while others report intensity data or both types of data. + This heterogeneity raises several issues. A first issue concerns the statistical analysis of the trials reporting prevalence and intensity. Although it is easy to fit a generalized linear model to each type of data separately, it is less straightforward to fit a single model to the whole set of trials in order to obtain a single ranking of the pest treatements taking into account both types of data at the same time. Generally, factorial trials assessing treatment efficacy are analyzed with statistical models that take into account one of the two metrics but not both. Prevalence data are thus commonly analyzed using binomial generalized linear models and intensity data are frequently analyzed with Poisson generalized linear models (@michel2017framework, @LAURENT2023106140, @agresti2015foundations). As far as we know, no statistical model has been proposed to assess treatment efficacy based on the simultaneous analysis of prevalence and intensity data. In @osiewalski2019joint, the authors introduced a switching model designed to handle two count variables, one of which may be degenerate. This model was employed to characterize the counts of cash payments and bank card payments in Poland, utilizing data from both cardholders and non-cardholders. A generalized form of the bivariate negative binomial regression model was developed in @gurmu2000generalized, allowing for a more flexible representation of the correlation between the dependent variables. This model was applied to describe the number of visits to a doctor and the number of visits to non-doctor health professionals. It outperformed existing bivariate models across various model comparison criteria. In order to analyze data related to crash counts categorized by severity, @park2007multivariate employed a multivariate Poisson-lognormal model, effectively addressing both overdispersion and a fully generalized correlation structure within the dataset. However, it should be noted that these models did not include any binomial distribution and thus could not be used to deal with proportion data, such as pest prevalence. + Another issue concerns the practical value of combining both prevalence and intensity data. It is unclear whether the simultaneous analysis of prevalence and intensity data may increase the accuracy of the estimated treatment efficacy compared to the use of a single type of data, and whether this may increase the probability of identifying the most efficient treatments. Finally, it is also unclear how future trials should be designed, in particular how many trials are required to obtain accurate estimations, and whether intensity data should be prefered to prevalence data. + + In this paper, we propose a new flexible statistical model that can be used to rank pest treatments from trials including prevalence data, intensity data, or both. We apply it to a real dataset including trials testing the efficacy of pesticides against aphids infesting sugar beets, considering contrasted scenarios of data availability, and we show how the proposed model can be used to evaluate the efficacy of different treatments. Based on simulations, we then quantify the reduction of mean absolute errors in the estimated treatment efficacies resulting from the use of both prevalence and intensity data during the statistical inference, compared to the use of either prevalence or intensity data. The rest of the paper is organized as follows. First, we present the structure of the dataset including real prevalence and intensity data. Next, we describe in detail the proposed model, the inference method, and the simulation strategy. After checking the convergence of the fitting algorithm, we show how the model can be used to assess treatement efficacy. We finally present the results based on simulated data and we make several recommendations. + +# Material and Method + +## Description of the data + +Data are collected in 32 field trials conducted in France, Belgium and the Netherlands to compare several treatments against aphids in sugar beets. Each trial consists in a plot located in a given site at a given year (site-year) divided into one to four blocks. Each of these blocks is itself divided into strips where different treatments are tested, one of these treatments being an untreated control and the others corresponding to different types of insecticide. In each strip of each block, the number of aphids is counted on a sample of 10 beet plants (intensity). The number of infested plants (prevalence) is measured as well, but only in 15 trials out of 32. The total numbers of intensity and prevalence data are equal to 1128 and 561, respectively. Note that the number of aphids is not counted on each beet plant but in the whole plant sample. Intensity and prevalence are monitored at different times after treatments. As shown in @fig-one A, the dataset is unbalanced as less data are available for the treatment Mavrik-jet compared to the others. Figure 1B shows that the intensity and prevalence tend to increase with time. + + + +```{r figure1, cache = FALSE, fig.width=12, fig.height=14} +#| echo: false +#| label: fig-one +#| fig-cap: "Description of the dataset. **A** Number of observations according to the type of insecticide. **B** Examples of observed number of aphids averaged over the blocks (intensity) and number of infested beets out of ten (prevalence) averaged over the blocks, at different dates for two trials." + +data_figure_1A = readRDS(file = "data/data_figure_1A.rds") +data_figure_1B = readRDS(file = "data/data_figure_1B.rds") + +rectangle_fig1A <- grobTree(rectGrob(gp = gpar(fill = "#e9e9e9")), textGrob("A", x = 0.5, hjust = 0.5, gp = gpar(cex = 2.5, fontface = "bold"))) + +fig1A = ggplot(data_figure_1A) + + geom_bar(aes(x = Insecticide, y = n, fill = Insecticide), stat = "Identity") + + xlab("Insecticide") + + ylab("Number of observations") + + scale_fill_manual(values = c('#df626c', '#893f3d', '#ff842e', '#188038')) + + scale_x_discrete(limits = c("Untreated", "Mavrik Jet", "Movento", "Teppeki")) + + theme(legend.position = "none", + axis.title.x = element_text(margin = margin(b = 50, t = 20), size = 18), + axis.title.y = element_text(size = 18, margin = margin(r = 20)), + axis.text = element_text(size = 16)) + + +rectangle_fig1B <- grobTree(rectGrob(gp = gpar(fill = "#e9e9e9")), textGrob("B", x = 0.5, hjust = 0.5, gp = gpar(cex = 2.5, fontface = "bold"))) + +fig1B_aphid_intensity = ggplot(data_figure_1B) + + geom_point(aes(x = DPT, y = Ymean, col = Insecticide), size = 2.5) + + geom_line(aes(x = DPT, y = Ymean, col = Insecticide), size = 0.2) + + scale_color_manual(values = c('#893f3d', '#ff842e', '#188038')) + + facet_wrap(~ ID, scales = "free", ncol = 2) + + theme(legend.position = "bottom") + + xlab("") + + ylab("Number of aphids") + + theme(plot.margin = margin(t = 40, r = 40, b = 0, l = 10), + axis.title.x = element_text(size = 18), + axis.title.y = element_text(size = 18, margin = margin(r = 20)), + axis.text = element_text(size = 16), + strip.text = element_text(size = 20), + legend.title = element_text(size = 18), + legend.text = element_text(size = 18)) + +fig1B_aphid_prevalence = ggplot(data_figure_1B) + geom_point(aes(x = DPT, y = Zmean, col = Insecticide), size = 2.5) + + geom_line(aes(x = DPT, y = Zmean, col = Insecticide), size = 0.2) + + scale_color_manual(values = c('#893f3d', '#ff842e', '#188038')) + + facet_wrap(~ ID, ncol = 2, scales = "free_x") + theme(legend.position = "none") + + xlab("Days post treatment") + + ylab("Number of infested beets") + + theme(panel.spacing = unit(1, "cm")) + + theme(plot.margin = margin(t = 0, r = 40, b = 0, l = 10), + axis.title.y = element_text(size = 18, margin = margin(r = 20)), + axis.text = element_text(size = 16), + axis.title.x = element_text(margin = margin(b = 20, t = 20), size = 18), + strip.text = element_text(size = 20)) + + +legend <- cowplot::get_legend(fig1B_aphid_intensity) + +do.call("grid.arrange", + c(list(rectangle_fig1A, + fig1A + theme(plot.margin = margin(t = 30, r = 20)), + rectangle_fig1B, + fig1B_aphid_intensity + theme(legend.position = "none"), + fig1B_aphid_prevalence), list(legend), list(ncol = 1, + layout_matrix = rbind(c(1), c(2), c(3), c(4), c(5), c(6)), heights = c(0.12, 1.2, 0.12, 1, 1, 0.12)))) +``` + + + + +## Model +### Specification + +We introduce an unobserved variable representing the number of pest individuals (here, aphids) on each plant in a sample of $N$ plants (here, sugar beets). This variable is noted $W$ and is assumed to follow a Poisson probability distribution whose mean value is a function of time. + +We use the following indexes: $i$ for the trial, $j$ for the treatment, $k$ for the block, $t$ for the time and $s$ for the plant number. The distribution of $W_{ijkts}$ is defined as: + +$$ + W_{ijkts}\sim\mathcal{P}(\lambda_{ijkt}) +$$ {#eq-model_W} + +$$ + \log\ \lambda_{ijkt} = \alpha_0 + \beta_{0i} + \gamma_{0j} + (\alpha_1 + \gamma_{1j})\ X_t + u_{ij} + \epsilon_{ijkt} +$$ {#eq-model_lambda} + +with + +- $\beta_{0i}\sim\mathcal{N}(0, \sigma_0^2)$ +- $u_{ij}\sim\mathcal{N}(0, \chi^2)$ +- $\epsilon_{ijkt}\sim\mathcal{N}(0,\eta^2)$ + +The random variables are all assumed independent. The parameters $\alpha_0$, $\alpha_1$, $\gamma_{0j}$, and $\gamma_{1j}$ are considered as fixed. This model serves as a tool for conducting inference on a population of trials, from which the subset of trials comprising our dataset is assumed to constitute a random sample. In essence, the trials contained within our database are leveraged to estimate the parameter values that characterize a target population, where the tested pest control treatments will be actually implemented. Consequently, all parameters contingent on individual trials have been defined as random effects. + +The observed variables (intensity and prevalence) can be expressed as a function of $W$. We note: + +- $Y_{ijkt}$ the number of pest individuals (aphids) in the sample of $N_i$ plants collected in trial i, treatment j, block k, at time t +- $Z_{ijkt}$ the number of infested plants among the $N_i$ plants collected in trial i, treatment j, block k, at time t + +Then, assuming the $W$s are independent, we have: + +$$ + Y_{ijkt} = \sum\limits_{s = 1}^{N_i} W_{ijkts}\ \hspace{1cm}\ Y_{ijkt} \sim \mathcal{P}(N_{i}\lambda_{ijkt}) +$$ {#eq-model_Y} + +$$ + Z_{ijkt} = \sum\limits_{s = 1}^{N_i} \boldsymbol{1}_{W_{ijkts}>0}\ \hspace{1cm} \ Z_{ijkt} \sim \mathcal{B}(N_{i},\ \pi_{ijkt}) +$$ {#eq-model_Z} + +where $\pi_{ijkt} = 1-\text{exp}(-\lambda_{ijkt})$. The different quantities used in the model are defined in Table 1. + +| | | +--|-- +i | trial index +j | treatment index +k | block index +t | time index +s | plant index +$N_i$ | sample size (number of plants) +$\lambda_{ijkt}$ | mean number of pest individuals (aphids) on one plant +$\pi_{ijkt}$ | probability for a plant to be infested +$\alpha_0$ | mean number of pest individuals (aphids) in the untreated group +$\beta_{0i}$ | trial effect +$\gamma_{0j}$ | effect of treatment j at time 0 (time of treatment) +$\alpha_1$ | growth parameter of the number of pest individuals for the untreated group +$\gamma_{1j}$ | effect of the treatment on the time effect (interaction between treatment j and time) +$X_t$ | number of days post treatment +$u_{ij}$ | random interaction between trial and treatment +$\epsilon_{ijkt}$ | residuals + +: Description of the indices, inputs and parameters used in the model {tbl-colwidths="[15,85]"} + +From this model we define the efficacy of the $j$th treatment at time $t$ ($t$ days after pesticide application) by the quantity (@LAURENT2023106140): + +$$ + \text{Ef}_{jt} = \Big(1 - \text{exp}\big(\gamma_{0j}+\gamma_{1j} \times X_{t}\big)\Big) \times 100 +$$ {#eq-Efficacy} + +The quantity $\text{Ef}_{jt}$ corresponds to the expected percentage reduction of pest individuals (aphid numbers) for the jth treatment compared to the untreated group, over trials and blocks. + +Our Poisson log linear model includes an additive random dispersion term associated to each individual observation ($\epsilon_{ijkt}$ in @eq-model_lambda). This is a standard and well-recognized approach to deal with over-dispersion (@harrison2014using). In order to check the model assumptions, we perform a posterior predictive check of our model to check that the data were compatible with the model assumptions. Posterior predictive check is frequently used to look for systematic discrepancies between real and simulated data (@gelman1995bayesian). To do so, we compute the probability of exceeding each individual data with the fitted model (@eq-model_lambda). The computed probabilities are all falling in the range 0.22-0.93 (except for the observations equal to 0, for which the probability of being greater was equal to 1), and are thus not extreme. This result indicates that the model specified is not incompatible with the observed data and that the over-dispersion is correctly taken into account. In addition, we fit another model including a negative binomial distribution instead of a Poisson distribution. The results are almost identical between the two models. + +### Inference on real data + +The model parameters are estimated using Bayesian inference with a Markov chain Monte-Carlo method. We perform the inference using R, with the package rjags (@rjags). For each of the six dataset listed in Table 2, we fit the model (@eq-model_lambda - @eq-model_Z) with the following weakly informative priors: $\mathcal{N}(0, 10^3)$ for the parameters $\alpha_0,\ \gamma_0,\ \alpha_1,\ \gamma_1$ and $\mathcal{U}([0, 10])$ for the parameters $\sigma_0, \chi, \eta$. We use two Markov chains with $2 \times 10^5$ iterations (after an adaptating phase of $10^5$ iterations), and we center the time variable $t$ to facilitate convergence. + +The convergence of the MCMC algorithm is checked by inspecting the mixing of the two Markov chains and monitoring the Gelman-Rubin diagnosis statistics (@gelman1992inference). We then compute the posterior mean of the pesticide treatment efficacy (defined by @eq-Efficacy) as well as its 95% credibility interval. The code used to fit the model is provided below. + + +::: {.callout-note} +The following code presents the inference on an extract of the real dataset, which includes both trials of figure 1B (2020 - B1A97 ; 2020 - u1CwE). It is a demo for the "50% Y - 50% Z" scenario and we set here the number of adaptation and iteration to 2000 in order to reduce computation time. +::: + + +```{r inference_example_real_data, cache = FALSE, results = 'hide'} +#| echo: true +#| eval: true +#| file: scripts/inference_example_real_data.R +``` + +\ +In practice, it is common that only $Y$ or $Z$ data are available in some of the trials. In this case, the resulting dataset includes observations of $Y$ in some trials and observations of $Z$ in others. The dataset may even include one type of observations only, either $Y$ or $Z$, in all trials. Here, we define four scenarios with contrasted levels of $Y$ and $Z$ availability in order to evaluate the consequences of using different types of datasets. We consider four data subsets defined from the real dataset including trials with observations of $Y$, with observations of $Z$, or with both types of observation in different proportions (Table 2). The data subset "100% Y - 0% Z" includes Y data collected in the 32 trials. The data subset "50% Y - 0% Z" includes Y data collected in the 17 trials for which no Z observation is available. The data subset "0% Y - 50% Z" includes the Z data collected in the 15 trials for which Z observations are available. The data subset "50% Y - 50% Z" includes $Y$ data collected in 17 trials and $Z$ data collected in the other 15 trials. The latter data subset does not include any trial reporting both $Y$ and $Z$ data. Throughout our analysis, missing data are assumed to be missing at random. + +The hierarchical model defined above is fitted to each dataset in turn. Each fitted model is then used to compute the posterior mean and 95% credibility interval of $Ef_{jt}$ for each treatment at $t$=6 and 12 days after pesticide application. + +| Type of dataset | Description | +|---------|:-----| +| 100% Y - 0% Z | Y observations available in the 32 trials and no Z | +| 50% Y - 0% Z | Y observations available in 17 trials and no Z | +| 0% Y - 50% Z | Z observations available in 15 trials and no Y | +| 50% Y - 50% Z | Y observations available in 17 trials and Z observations available in the other 15 trials | + +: Four data subsets defined from the original dataset (real data). {tbl-colwidths="[35,65]"} + + +### Simulations + +Simulations are carried out to further investigate the impact of the type and amount of data available on the accuracy of the parameters and the ability of the model to identify the most and least effective treatments. + +We define three numbers of trials, equal to 20, 40 and 80, successively. For data simulations, the model parameters are set equal to those estimated from the real dataset "100% Y - 0% Z" defined in Table 2 (posterior means). For each number of trials, we generate virtual data from the model (@eq-model_W - @eq-model_Z) and estimate the model parameters, according to the following procedure: + +- Draw values of $\beta_{0i},\ u_{ij}$ and $\epsilon_{ijkt}$ in their distributions for each trial, 3 treatments (+ the untreated control), 3 dates ($t$=0, 6, 12), and 4 blocks +- Calculate $\lambda_{ijkt}$ from @eq-model_lambda, +- Draw values of $W_{ijkts}$ in its Poisson distribution for 10 plants ($s$=1, ..., 10) +- Calculate $Y_{ijkt}$ and $Z_{ijkt}$ from the Ws for each trial, treatment, date, block. +- Generate the eight data subsets corresponding to the scenarios defined in Table 3 (including all values of $W$, the generated values of $Y$ only, the generated values of $Z$ only, both $Y$ and $Z$ values but not $W$, the values of $Y$ in 50% of the trials, the values of $Z$ in 50% of the trials, the values of $Y$ in 50% of the trials and the values of $Z$ in the other 50%), +- Fit the model (@eq-model_W - @eq-model_Z) to each of the data subsets according to the procedure described above based on MCMC. + +At the end of this procedure, we get eight sets of estimated parameters, corresponding to the eight scenarios defined in Table 3. +This procedure is repeated 1000 times with each time a different seed between 0 and 999. However, the computations performed with jags failed for 26 replicates and thus 974 replicates were available for the analysis. + + +For each number of trials and each scenario defined in Table 3, the accuracy of the estimated parameters $\gamma$ (from which depend the treatment efficacies) is evaluated by computing an absolute error, averaged over the three treatments ($j$=1 corresponding to the control) as: + + +$$ + E_{\gamma} = \frac{1}{2 \times 3}\sum\limits_{j = 2}^{4} \Big( \frac{|\gamma_{0j} - \hat{\gamma}_{0j}|}{|\gamma_{0j}|} + \frac{|\gamma_{1j} - \hat{\gamma}_{1j}|}{|\gamma_{1j}|}\Big) +$$ {#eq-E_gamma} + +where the true parameter values are set equal to the posterior means computed with the real dataset, and the parameter estimates ($\hat{\gamma}_{0j}$ and $\hat{\gamma}_{1j}$) are the posterior means computed for the jth treatment. For each trial number and each scenario defined in Table 3, the 974 values of $E_{\gamma}$ obtained for the 974 generated data subsets are then averaged. The average values obtained for the eight scenarios are finally compared to determine the type of data leading to the most accurate estimated parameter values. + +In addition, we compare the eight scenarios according to another criterion measuring the difference between the estimated efficacy values and the true efficacy value (averaged over the three pesticide treatments considered), as follows: + +$$ + E_{\text{Ef}_{t}}\ = \frac{1}{3} \sum\limits_{j = 2}^4 \frac{|\text{Ef}_{jt} - \hat{\text{Ef}}_{jt}|} {|\text{Ef}_{jt}|} +$$ {#eq-E_efficacy} + +where the true treatment efficacy is defined by @eq-Efficacy (setting the parameters $\gamma$s to the posterior means obtained with the real dataset) and the estimated efficacy ($\hat{\text{Ef}}_{jt}$) is the posterior mean computed with the simulated dataset. The 974 values of $E_{\text{Eft}}$ obtained from the 974 simulated datasets are then averaged for each trial number and each scenario. Finally, we evaluate the proportions of cases where the true best treatment (i.e., the treatment with the highest efficacy) is correctly identified. + +In order to determine whether the difference of performance obtained with the types of data $Y$ and $Z$ depends on the pest abundance, we perform an additional series of simulations with three values for the model parameter $\alpha_0$, equal to $-1,\ 1$ and $2$, successively. These three values of $\alpha_0$ define three contrasted levels of pest abundance (the higher $\alpha_0$, the higher the abundance). We used the procedure outlined above considering only two scenarios of data availability, namely "100% Y - 0% Z" and "0% Y - 100% Z". This procedure is implemented with each value of $\alpha_0$ in turn. The results are used to compare the model performances using either $Y$ or $Z$ for parameter estimation, depending of the pest abundance specified by $\alpha_0$. + +|Type of dataset| Description| +|---|---| +|100% W | W observations available in all the trials| +|100% Y - 0% Z | Y observations available in all the trials| +|0% Y - 100% Z | Z observations available in all the trials| +|100% Y - 100% Z | Y and Z observations available in all the trials| +|50% Y - 0% Z | Y observations available in half of the trials| +|0% Y - 50% Z | Z observations available in half of the trials| +|50% Y - 50% Z | Y observations available in half of the trials and Z observations available in the other half of the trials| +|50% Y - 100% Z | Y observations available in half of the trials and Z observations available in all the trials| + +: Eight scenarios compared using simulated data. {tbl-colwidths="[35,65]"} + +These simulations required a significant amount of computation time and were conducted on the INRAE MIGALE server. For a single seed and 20 trials, the computations took 80 minutes on a computer with an Intel Core i7 processor running at 1.90 GHz and 32 GB of RAM. The code used to perform them is presented below. + +```{r simulation-function, cache = FALSE} +#| echo: true +#| eval: true +#| file: functions/simu_data.R +``` + +\ + +::: {.callout-note} +The following code presents the inference for one seed. As the computing time is growing fast with the number of trials and the number of sampled values, we set here the number of trials to 10 (I = 10) and the number of adaptations and iterations to 2000. + +Our simulation results are obtained by running this code for I = 10, 20, 40, 80, for all seeds between 0 and 999, with a number of adaptations and iterations equal to 36000. + +Scenarios "50% Y - 0% Z" and "0% Y - 50% Z" are obtained from scenarios "100% Y - 0% Z" and "0% Y - 100% Z", respectively. For example "50% Y - 0% Z" scenario with 40 trials corresponds to "100% Y - 0% Z" with 20 trials. +::: + + + + +```{r simulations, results = 'hide', cache = FALSE} +#| echo: true +#| eval: true +#| file: scripts/simulations.R +``` + +# Results +## Results obtained with real data + +```{r load_res_real_data, cache = FALSE, echo = FALSE} +resY = readRDS(file = "results/Real_data/res.Y.nadapt.150000.niter.3e+05.rds") +resYZ = readRDS(file = "results/Real_data/res.YhalfZhalf.nadapt.150000.niter.3e+05.rds") +resYhalf = readRDS(file = "results/Real_data/res.Yhalf.nadapt.150000.niter.3e+05.rds") +resZhalf = readRDS(file = "results/Real_data/res.Zhalf.nadapt.150000.niter.3e+05.rds") +``` + +We present here the results obtained with real data. First, we check the convergence of the model for the scenarios defined in Table 2. We then compare the estimated values and the credibility intervals of the treatment efficacy obtained in the different scenarios. + +### Model convergence and posterior distributions + +@fig-two presents the Markov chains associated with the model parameters and treatment efficacies for the different scenarios. The x-axis presents the iteration number and the y-axis presents the sampled value. Results show that the chains are well mixed. @fig-three presents the Gelman-Rubin statistics associated with the model parameters and treatement efficacies as a function of the iterations, for the different scenarios. We observe that this statistics converges to 1, which indicates the convergence of the algorithm. + +Table 4 gives a summary of the posterior distributions of the model parameters and treatment efficacies obtained for the "100% Y - 0% Z" scenario. The significantly positive value of $\alpha_1$ indicates that the aphid numbers tend to increase with time in untreated plots. The relatively high value of $\sigma_0$ (posterior mean equal to 1.87) reveals a strong variability in aphid numbers between trials. The posterior mean value of $\chi$ (0.27) suggests that the treatment efficacy varies across trials. The $\gamma_0$ parameter is negative for all three treatments, indicating a negative effect of the treatments on the aphid numbers at the time of pesticide spray. The Movento and Teppeki treatments have a similar effect with a posterior mean for $\gamma_0$ equal to -1.13 and -1.24, and a standard deviation equal to 0.12 and 0.11, respectively. The effect of treatment Mavrik Jet is weaker as its posterior mean for $\gamma_0$ is equal to -0.13 and its standard deviation is equal to 0.16. The $\gamma_1$ posterior means are negative for Movento and Teppeki (-0.14 and -0.15), suggesting that the effect of these treatments tend to increase with time, but the posterior mean value is positive for Mavrik Jet (= 0.24), suggesting that the effect of this treatment may decrease with time. However, the 95% credibility intervals of $\gamma_1$ include zero and these parameters are not very accurately estimated. + + + +```{r figure2, cache = FALSE, out.width="100%", fig.height = 7, echo = FALSE} +#| label: fig-two +#| fig-cap: "Model convergence - Markov chain for the model parameters and treatment efficacies, in the scenarios \"50% Y - 0% Z\" (**A**), \"50% Y - 50% Z\" (**B**), \"100% Y - 0% Z\" (**C**) and \"0% Y - 50% Z\" (**D**). The x-axis presents the iteration number and the y-axis presents the sampled value." + +source(file = "functions/plot_chains.R") + +options(warn = - 1) +col_background = "#e9e9e9"; col_text = "black" +cex = 0.85 +nrow = 3 +nbreaks = 3 +strip_size = 5 + +fig2A = suppressMessages(plot_chains(resYhalf, nrow = nrow) + ylab("Sampled value") + scale_x_continuous(breaks = c(0, 1.5e4, 3e4))) + xlab("") + + scale_y_continuous(n.breaks = nbreaks) + theme(strip.text = element_text(size = strip_size)) +my_g2A <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("A", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig2B = suppressMessages(plot_chains(resYZ, nrow = nrow) + ylab("Sampled value") + scale_x_continuous(breaks = c(0, 1.5e4, 3e4))) + xlab("") + + scale_y_continuous(n.breaks = nbreaks) + theme(strip.text = element_text(size = strip_size)) +my_g2B <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("B", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig2C = suppressMessages(plot_chains(resY, nrow = nrow) + ylab("Sampled value") + scale_x_continuous(breaks = c(0, 1.5e4, 3e4))) + xlab("") + + scale_y_continuous(n.breaks = nbreaks) + theme(strip.text = element_text(size = strip_size)) +my_g2C <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("C", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig2D = suppressMessages(plot_chains(resZhalf, nrow = nrow) + ylab("Sampled value") + scale_x_continuous(breaks = c(0, 1.5e4, 3e4))) + + scale_y_continuous(n.breaks = nbreaks) + theme(strip.text = element_text(size = strip_size), axis.title.x = element_text(margin = margin(t = 5))) +my_g2D <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("D", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +grid.arrange(my_g2A, fig2A, my_g2B, fig2B, my_g2C, fig2C, my_g2D, fig2D, heights = c(1,9,1,9,1,9,1,10)) +``` + +\ + +```{r figure3, cache = FALSE, out.width="100%", fig.height = 7, echo = FALSE} +#| label: fig-three +#| fig-cap: "Model convergence - Gelman-Rubin statistics for the parameters of the model (@eq-model_W - @eq-model_Z) and for treatment efficacies (@eq-Efficacy), according to the scenarios \"50% Y - 0% Z\" (**A**), \"50% Y - 50% Z\" (**B**), \"100% Y - 0% Z\" (**C**) and \"0% Y - 50% Z\" (**D**). The x-axis presents the iteration number and the y-axis presents the Gelman-Rubin statistic." + +source(file = "functions/gelman.plot2.R") + +margey = 5 +margex = 10 +ncol = 9 +strip_size = 5 +breaks = c(1, 3, 5); limits = c(1, 5) + +fig3A = gelman.plot2(resYhalf, ncol = ncol) + + scale_x_continuous(breaks = c(2e5, 4e5)) + xlab("") + + scale_y_continuous(limits = limits, breaks = breaks) + + theme(axis.title.y = element_text(margin = margin(l = - margey, r = margey)), legend.position = "none", strip.text = element_text(size = strip_size)) +my_g3A <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("A", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig3B = gelman.plot2(resYZ, ncol = ncol) + + scale_x_continuous(breaks = c(2e5, 4e5)) + xlab("") + + scale_y_continuous(limits = limits, breaks = breaks) + + theme(axis.title.y = element_text(margin = margin(l = - margey, r = margey)), legend.position = "none", strip.text = element_text(size = strip_size)) +my_g3B <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("B", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig3C = gelman.plot2(resY, ncol = ncol) + + scale_x_continuous(breaks = c(2e5, 4e5)) + xlab("") + + scale_y_continuous(limits = limits, breaks = breaks) + + theme(axis.title.y = element_text(margin = margin(l = - margey, r = margey)), legend.position = "none", strip.text = element_text(size = strip_size)) +my_g3C <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("C", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +fig3D = gelman.plot2(resZhalf, ncol = ncol) + + scale_x_continuous(breaks = c(2e5, 4e5)) + xlab("") + + scale_y_continuous(limits = limits, breaks = breaks) + + theme(axis.title.y = element_text(margin = margin(l = - margey, r = margey)), strip.text = element_text(size = strip_size), plot.margin = margin(b = - 5, t = 6, r = 6, l = 6)) +my_g3D <- grobTree(rectGrob(gp = gpar(fill = col_background)), textGrob("D", x = 0.5, hjust = 0.5, gp = gpar(col = col_text, cex = cex, fontface = "bold"))) + +x_axis <- grobTree(textGrob("Last iteration in chain", x = 0.5, hjust = 0.5, gp = gpar(cex = 0.7))) +legend = cowplot::get_legend(fig3D) + +grid.arrange(my_g3A, fig3A, my_g3B, fig3B, my_g3C, fig3C, my_g3D, fig3D + theme(legend.position = "none"), x_axis, legend, heights = c(1,9,1,9,1,9,1,9, 1, 2)) +``` +\ + + +```{r table4, cache = FALSE, echo = FALSE} + +tab4 = cbind(summary(resY)$statistics, summary(resY)$quantiles) %>% + as.data.frame %>% + rownames_to_column(var = "Parameter") %>% select(- c(4, 5, c(7 : 9))) %>% + mutate(Parameter = recode(Parameter, "gamma0[1]" = "gamma0 - untreated", "gamma0[2]" = "gamma0 - Mavrik Jet", + "gamma0[3]" = "gamma0 - Movento", "gamma0[4]" = "gamma0 - Teppeki", + "gamma1[1]" = "gamma1 - untreated", "gamma1[2]" = "gamma1 - Mavrik Jet", + "gamma1[3]" = "gamma1 - Movento", "gamma1[4]" = "gamma1 - Teppeki" )) %>% + filter(!grepl("Eff", Parameter)) %>% + + mutate(Parameter = recode(Parameter, "alpha0" = "\u03B1\u2080", + "alpha1" = "\u03B1\u2081", + "chi" = "\u03C7", + "eta" = "\u03B7", + "sigma0" = "\u03C3\u2080")) %>% + mutate(Parameter = gsub(x = Parameter, pattern = "gamma0", replacement = "\u03B3\u2080")) %>% + mutate(Parameter = gsub(x = Parameter, pattern = "gamma1", replacement = "\u03B3\u2081")) %>% + mutate_if(is.numeric, round, 2) + +# re-ordering the parameters +tab4 = tab4[c(c(1 : 2), 13, c(3 : 12)), ] + +rownames(tab4) = NULL + +tab4 %>% + kbl(booktabs = TRUE, caption = "Summary of the posterior distributions obtained with the \"100\\% Y - 0\\% Z\" scenario for the model parameters and treatment efficacies: posterior mean, standard deviation, 2.5 and 97.5 quantiles.", linesep = "") %>% + kable_styling() +``` + + +### Estimated values of pesticide treatment efficacies + +@fig-four presents the posterior means and the 95% credibility intervals of treatment efficacies at 6 days (A) and 12 days (B) after pesticide spray, for the "100% Y - 0% Z", "50% Y - 50% Z", "50% Y - 0% Z" and "0% Y - 50% Z" scenarios. Different scenarios are indicated by different colors. The x-axis presents the efficacy and the y-axis presents the treatments. Overall, the results obtained are consistent across scenarios; Teppeki and Movento show higher mean efficacies than Mavrik Jet, and the credibility intervals are narrower for Teppeki and Movento than for Mavrik Jet in all scenarios. The credibility interval of the "100% Y - 0% Z" scenario is narrower than that of the "50% Y - 50% Z" scenario, which is itself narrower than that of the "50% Y - 0% Z" and "0% Y - 50% Z" scenarios. Overall, the credibility interval sizes obtained with the "100% Y - 0% Z" scenario are 25% to 45% smaller than those obtained with the "50% Y - 0% Z" scenario (Table 5), aligning with the principle that increased data availability leads to more precise estimates. Results also indicate that credibility intervals are frequently larger with "0% Y - 50% Z" than with "50% Y - 0% Z," suggesting that more accurate estimates are achievable using Y compared to Z, at least in this specific case study. Interestingly, the sizes of the credibility intervals are approximately 25% smaller with "50% Y - 50% Z" compared to "50% Y - 0% Z," demonstrating that the combination of Y and Z observations collected from distinct trials proves beneficial and results in a reduction of uncertainty in the estimated treatment efficacy. This finding underscores the potential enhancement of treatment efficacy estimation through the combination of trials incorporating prevalence data and those incorporating intensity data. + +```{r figure4, cache = FALSE, echo=FALSE, out.width = "100%", fig.height = 2.6} +#| label: fig-four +#| fig-cap: "Estimated treatment efficacies after 6 days (**A**) and after 12 days (**B**), with their credibility intervals. Colors correspond to the different scenarios." + +####################################################################################################################################################### + +y1 = 0.075; y2 = 0.225 + +# PR à 6 jours + +df_resY = cbind(summary(resY)$statistics[1 : 3, ], summary(resY)$quantiles[1 : 3, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,1]" = "Mavrik Jet", "Eff[3,1]" = "Movento", "Eff[4,1]" = "Teppeki")) + +df_resYZ = cbind(summary(resYZ)$statistics[1 : 3, ], summary(resYZ)$quantiles[1 : 3, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,1]" = "Mavrik Jet", "Eff[3,1]" = "Movento", "Eff[4,1]" = "Teppeki")) + +df_resYhalf = cbind(summary(resYhalf)$statistics[1 : 3, ], summary(resYhalf)$quantiles[1 : 3, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,1]" = "Mavrik Jet", "Eff[3,1]" = "Movento", "Eff[4,1]" = "Teppeki")) + +df_resZhalf = cbind(summary(resZhalf)$statistics[1 : 3, ], summary(resZhalf)$quantiles[1 : 3, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,1]" = "Mavrik Jet", "Eff[3,1]" = "Movento", "Eff[4,1]" = "Teppeki")) + +Order = "binf" + +df_IC = rbind(df_resYhalf %>% mutate(Data = "50% Y - 0% Z"), + df_resYZ %>% mutate(Data = "50% Y - 50% Z"), + df_resY %>% mutate(Data = "100% Y - 0% Z"), + df_resZhalf %>% mutate(Data = "0% Y - 50% Z")) + +prod_rescale = df_resY %>% group_by(Insecticide) %>% summarise(P = ifelse(Order == "moyenne", max(P), min(value))) %>% + as.data.frame %>% arrange(P) %>% select(Insecticide) %>% as.matrix %>% as.vector + +x = c(1 : length(prod_rescale)); names(x) = prod_rescale + +df_IC_6 = df_IC %>% mutate(Ordonnee = recode(Insecticide, !!!x)) %>% + mutate(Ordonnee = ifelse(Data == "100% Y - 0% Z", Ordonnee + y2, + ifelse(Data == "50% Y - 50% Z", Ordonnee + y1, + ifelse(Data == "50% Y - 0% Z", Ordonnee - y1, Ordonnee - y2))), + jour = "6") + +# calcul du pourcentage de réduction de l'IC par rapport au scenario 50% Y - 0% Z +df_len = df_IC %>% select(Insecticide, name, value, Data) %>% pivot_wider(names_from = c(name)) %>% as.data.frame %>% + mutate(IC_length = b_sup - b_inf) %>% arrange(Insecticide) + +df_remp = df_len %>% filter(Data == "50% Y - 0% Z") %>% select(Insecticide, IC_length) +x = df_remp$IC_length; names(x) = df_remp$Insecticide + +df_len = df_len %>% mutate(length50pcY = recode(Insecticide, !!!x)) %>% + mutate(`For efficacy at 6 days` = (1 - (IC_length / length50pcY)) * (- 100)); + +df_len6 = df_len %>% rename(Insecticide = Insecticide) %>% select(Insecticide, Data, `For efficacy at 6 days`) %>% + filter(Data != "50% Y - 0% Z") %>% arrange(Data) %>% mutate_if(is.numeric, round, digits = 1) + +####################################################################################################################################################### +# PR à 12 jours +df_resY = cbind(summary(resY)$statistics[4 : 6, ], summary(resY)$quantiles[4 : 6, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,2]" = "Mavrik Jet", "Eff[3,2]" = "Movento", "Eff[4,2]" = "Teppeki")) + +df_resYZ = cbind(summary(resYZ)$statistics[4 : 6, ], summary(resYZ)$quantiles[4 : 6, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,2]" = "Mavrik Jet", "Eff[3,2]" = "Movento", "Eff[4,2]" = "Teppeki")) + +df_resYhalf = cbind(summary(resYhalf)$statistics[4 : 6, ], summary(resYhalf)$quantiles[4 : 6, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,2]" = "Mavrik Jet", "Eff[3,2]" = "Movento", "Eff[4,2]" = "Teppeki")) + + +df_resZhalf = cbind(summary(resZhalf)$statistics[4 : 6, ], summary(resZhalf)$quantiles[4 : 6, ]) %>% + as.data.frame %>% select(Mean, `2.5%`, `97.5%`) %>% rownames_to_column("Insecticide") %>% + pivot_longer(cols = c(3, 4)) %>% rename(P = Mean) %>% + mutate(name = recode(name, `2.5%` = "b_inf", `97.5%` = "b_sup")) %>% + mutate(Insecticide = recode(Insecticide, "Eff[2,2]" = "Mavrik Jet", "Eff[3,2]" = "Movento", "Eff[4,2]" = "Teppeki")) + +Order = "binf" + +df_IC = rbind(df_resYhalf %>% mutate(Data = "50% Y - 0% Z"), + df_resYZ %>% mutate(Data = "50% Y - 50% Z"), + df_resY %>% mutate(Data = "100% Y - 0% Z"), + df_resZhalf %>% mutate(Data = "0% Y - 50% Z")) + +prod_rescale = df_resY %>% group_by(Insecticide) %>% summarise(P = ifelse(Order == "moyenne", max(P), min(value))) %>% + as.data.frame %>% arrange(P) %>% select(Insecticide) %>% as.matrix %>% as.vector + +x = c(1 : length(prod_rescale)); names(x) = prod_rescale + +df_IC_12 = df_IC %>% mutate(Ordonnee = recode(Insecticide, !!!x)) %>% + mutate(Ordonnee = ifelse(Data == "100% Y - 0% Z", Ordonnee + y2, + ifelse(Data == "50% Y - 50% Z", Ordonnee + y1, + ifelse(Data == "50% Y - 0% Z", Ordonnee - y1, Ordonnee - y2))), + jour = "12") + +# calcul du pourcentage de réduction de l'IC par rapport au scenario 50% Y - 0% Z +df_len = df_IC %>% select(Insecticide, name, value, Data) %>% pivot_wider(names_from = c(name)) %>% as.data.frame %>% + mutate(IC_length = b_sup - b_inf) %>% arrange(Insecticide) + +df_remp = df_len %>% filter(Data == "50% Y - 0% Z") %>% select(Insecticide, IC_length) +x = df_remp$IC_length; names(x) = df_remp$Insecticide + +df_len = df_len %>% mutate(length50pcY = recode(Insecticide, !!!x)) %>% + mutate(`For efficacy at 12 days` = (1 - (IC_length / length50pcY)) * (- 100)); + +df_len12 = df_len %>% rename(Insecticide = Insecticide) %>% select(Insecticide, Data, `For efficacy at 12 days`) %>% + filter(Data != "50% Y - 0% Z") %>% arrange(Data) %>% mutate_if(is.numeric, round, digits = 1) + +####################################################################################################################################################### +df_IC = rbind(df_IC_6, df_IC_12) %>% mutate(jour = recode(jour, "6" = "A", "12" = "B")); + +col = c("#9F248FFF", "#017A4AFF", "#F9791EFF", "#244579FF") # c(paletteer_d("awtools::spalette", n = 5))[- c(2)] + +ggplot(df_IC) + geom_point(aes(x = P, y = Ordonnee, color = Data), size = 1) + + geom_line(aes(x = value, y = Ordonnee, group = Ordonnee, color = Data), size = 0.4) + + scale_color_manual(values = col, limits = c("100% Y - 0% Z", "50% Y - 50% Z", "50% Y - 0% Z", "0% Y - 50% Z")) + + scale_y_discrete(limits = prod_rescale) + + theme(legend.position = "bottom", legend.key.width = unit(0.5, 'cm'), legend.key.size = unit(0.4, "cm")) + ylab("") + + xlab("Treatment efficacy") + geom_vline(xintercept = 0, col = "#990033", size = 0.5, linetype = "dashed") + + theme(axis.title.x = element_text(margin = margin(b = - 7, t = 5))) + + labs(color = "Scenario") + + facet_wrap(~ jour, ncol = 2, scales = "free_x") +``` + + + +```{r table5, cache = FALSE, echo = FALSE} + +cbind(df_len6, df_len12 %>% select(- Data, - Insecticide)) %>% + kbl(booktabs = TRUE, caption = "Differences in the sizes of the 95 credibility intervals (CI) of the estimated treatment efficacies for the scenarios \"50\\% Y - 50\\% Z\", \"0\\% Y - 50\\% Z\" and \"100\\% Y - 0\\% Z\", compared to \"50\\% Y - 0\\% Z\". The difference is given in percentage. A positive (negative) value indicates an increase (decrease) of the credibility interval size. The third column indicates differences for the efficacy at 6 days after pesticide spray, and the fourth column indicates the difference for the efficacy at 12 days.", linesep = "") %>% + kable_styling() +``` + + + + + +## Results obtained by simulation + +```{r load_res_simulations, cache = FALSE, echo = FALSE} +df_col_simu = readRDS(file = "results/Simulations/df_col_simu.rds") +res_simu_sigma_1.87 = readRDS(file = "results/Simulations/res_simu_sigma_1.87.rds") + +res_simu = res_simu_sigma_1.87 + +res_simu = res_simu %>% mutate(Type = recode(Type, "Y" = "100% Y - 0% Z", "Z" = "0% Y - 100% Z", "Zdemi" = "0% Y - 50% Z", + "Ydemi" = "50% Y - 0% Z", "YdemiZdemi" = "50% Y - 50% Z", "ZYdemi" = "50% Y - 100% Z"), + Erreur = recode(name, "MEgamma" = "A", "MEEf6" = "B", "MEEf12" = "C", "MEbest6" = "A", "MEbest12" = "B"), + value = ifelse(grepl("best", name), value * 100, value)) + +df_col = df_col_simu + +types = res_simu$Type %>% unique +x = df_col %>% filter(type %in% types) %>% select(col) %>% as.matrix %>% as.vector; names(x) = types + +res_simu = res_simu %>% mutate(Col = recode(Type, !!!x)) +``` + +### Interest of combining trials with prevalence and trials with intensity + +In this section, we consider the situation where only one type of observation is available per trial - pest prevalence or pest intensity. We compare the accuracy of the estimated parameters and estimated levels of treatment efficacy obtained by combining both types of trials compared to the results obtained using each set of trials separately. + +The parameters used to generate the data are given in table 6. + + +```{r table6, cache = FALSE, echo = FALSE} + +tab6 = data.frame(Parameters = c( + + # alpha_0 et alpha_1 + c("\u03B1\u2080", "\u03B1\u2081"), + + # gamma_0 + c("\u03B3\u2080\u2080", "\u03B3\u2080\u2081", "\u03B3\u2080\u2082", "\u03B3\u2080\u2083"), + + # gamma_1 + c("\u03B3\u2081\u2080", "\u03B3\u2081\u2081", "\u03B3\u2081\u2082", "\u03B3\u2081\u2083"), + + # sigma_0, eta et chi + c("\u03C3\u2080", "\u03B7", "\u03C7") + ), + Values = c(alpha0, alpha1, gamma0, gamma1, sig0, eta, chi)) + + +tab6 %>% + t %>% + kbl(booktabs = TRUE, caption = "Parameters used to generate virtual data.") %>% + kable_styling() +``` + + + +@fig-five represents the $E_{\gamma}$ (@eq-E_gamma) (**A**), $E_{Ef_6}$ (@eq-E_efficacy) (**B**) and $E_{Ef_{12}}$ (@eq-E_efficacy) (**C**) evaluation criteria for the "0% Y - 50% Z", "50% Y - 0% Z" and "50% Y - 50% Z" scenarios (different scenarios are indicated by different colors). The x-axis presents the number of trials and the y-axis the value of the criterion, averaged over the simulated data sets. For each number of trials and for each criterion, we observe that scenario "50% Y - 50% Z" gives a more accurate estimate than scenario "50% Y - 0% Z" which itself gives a more accurate estimate than scenario "0% Y - 50% Z". For example, for the efficacy at 6 days with 40 trials, the mean absolute error of scenario "50% Y - 0% Z" is 10% less than the mean absolute error of scenario "0% Y - 50% Z" (0.38 vs 0.42). The mean absolute error of scenario "50% Y - 50% Z" is 35% less than that of scenario "0% Y - 50% Z" (0.26 vs 0.42). The values of the three criteria decrease with the number of trials. The $E_{\gamma}$ criterion decreases from 0.62 with 20 trials to 0.32 with 80 trials for the "50% Y - 50% Z" scenario. A value of 20 trials is therefore not sufficient to obtain an accurate estimate of the parameters. + + +```{r figure5, cache = FALSE, out.width="100%", fig.height = 2.5, echo = FALSE} +#| label: fig-five +#| fig-cap: "Values of the $E_{\\gamma}$ (@eq-E_gamma) (**A**), $E_{Ef_6}$ (@eq-E_efficacy) (**B**) and $E_{Ef_{12}}$ (@eq-E_efficacy) (**C**) mean absolute error criteria for the \"0% Y - 50% Z\", \"50% Y - 0% Z\" and \"50% Y - 50% Z\" scenarios. The x-axis presents the number of trials and the y-axis presents the mean absolute error, averaged over the 974 simulated datasets. Different colors correspond to different scenarios." + +types = c("50% Y - 0% Z", "50% Y - 50% Z", "0% Y - 50% Z") +res_temp = res_simu %>% filter(Type %in% types) +col_temp = df_col %>% arrange(type) %>% filter(type %in% types) %>% select(col) %>% as.matrix %>% as.vector + +ggplot(res_temp %>% filter(!grepl("best", name))) + geom_point(aes(x = I, y = value, color = Type), size = 1) + + geom_line(aes(x = I, y = value, color = Type), size = 0.2) + xlab("Number of trials") + + facet_wrap(~ Erreur, nrow = 1, scales = "free") + theme(legend.position = "bottom") + + ylab("Mean absolute error") + theme(legend.title = element_blank(), axis.title.x = element_text(margin = margin(t = 5, b = -8))) + + scale_color_manual(values = col_temp) +``` + +@fig-six presents the percentages of cases where the best treatment at 6 days (A) and 12 days (B) has been correctly identified for the "50% Y - 0% Z", "0% Y - 50% Z" and "50% Y - 50% Z" scenarios. The x-axis presents the number of trials and the y-axis the percentage of cases where the treatment identification is correct. In general, the best treatment is better identified when the number of trials increases. With the "50% Y - 50% Z" scenario, the best treatment at 6 days is well identified in 69% of cases with 20 trials and in 85% of cases with 80 trials. For each number of trials, the percentage of correctly identification is higher for the "50% Y - 50% Z" scenario than for the other two, and the scenario "50% Y - 0% Z" generally gives better results than the scenario "0% Y - 50% Z", except at 6 days with 20 trials. For example, at 12 days after treatment and with 40 trials, the percentage of correct identification is 5% higher with scenario "50% Y - 50% Z" than with scenario "50% Y - 0% Z" (78 vs 73), and 4% higher with the scenario "50% Y - 0% Z" than with scenario "0% Y - 50% Z" (73 vs 69). These results show the interest of combining prevalence and intensity data for assessing the efficacy of treatments and identifying the best treatments. + + +```{r figure6, cache = FALSE, out.width="100%", fig.height = 3, echo = FALSE} +#| label: fig-six +#| fig-cap: "Comparison of proportion cases where the best treatment is correctly identified in the \"0% Y - 50% Z\", \"50% Y - 0% Z\" and \"50% Y - 50% Z\" scenarios. The x-axis represents the number of trials and the y-axis represents the percentage of cases where the best treatment has been correctly identified at 6 days (A) and 12 days (B), over the 974 simulated datasets. Different colors correspond to different scenarios." + +ggplot(res_temp %>% filter(grepl("best", name))) + geom_point(aes(x = I, y = value, color = Type), size = 1) + + geom_line(aes(x = I, y = value, color = Type), size = 0.2) + xlab("Number of trials") + + facet_wrap(~ Erreur, nrow = 1, scales = "free") + theme(legend.position = "bottom") + + ylab("Best treatment identification percentage") + theme(legend.title = element_blank(), axis.title.x = element_text(margin = margin(t = 5, b = - 8))) + + scale_color_manual(values = col_temp) +``` + +### Interest of adding intensity when prevalence is measured in all trials + +We now consider a situation where prevalence is measured in each trial and intensity is measured in only some of these trials. We compare the results obtained when the data are combined and when they are used separately. As the prevalence data are usually more accessible in practice and the intensity data more costly, it is important to evaluate the interest of adding intensity data in the statistical analysis. + + The parameters used to generate the data are the same as in 3.2.1. + + @fig-seven presents the evaluation criteria $E_{\gamma}$ (Fig 7A), $E_{Ef_6}$ (Fig 7B) and $E_{Ef_{12}}$ (Fig. 7C) for the scenarios "0% Y - 100% Z", "50% Y - 0% Z", "50% Y - 100% Z" and "50% Y - 50% Z". The x-axis presents the number of trials and the y-axis presents the value of the criterion, averaged over the number of simulated data sets. Results show that the mean absolute errors are lower in scenarios "50% Y - 100% Z" and "50% Y - 50% Z" than in "0% Y - 100% Z", and that the mean absolute errors are lower in the "0% Y - 100% Z" scenario than in the "50% Y - 0% Z" scenario. For example, considering the treatment efficacy at 12 days with 40 trials (Fig. 7C), the mean absolute errors are 13% lower in scenarios "50% Y - 100% Z" and "50% Y - 50% Z" than in "0% Y - 100% Z" (0.30 vs 0.34), and the mean absolute error is 11% lower in "0% Y - 100% Z" than in "50% Y - 0% Z" (0.34 vs 0.38). Clearly, adding intensity data to prevalence data improves the accuracy of the estimations. The mean absolute errors decrease with the number of trials. For example, the $E_{\gamma}$ criterion decreases from 0.62 with 20 trials to 0.32 with 80 trials for the "50% Y - 50% Z" scenario. As noted above, 20 trials is clearly not sufficient to obtain accurate results. + +```{r figure7, cache = FALSE, out.width="100%", fig.height = 2.5, echo = FALSE} +#| label: fig-seven +#| fig-cap: "Values of $E_{\\gamma}$ (@eq-E_gamma) (**A**), $E_{Ef_6}$ (@eq-E_efficacy) (**B**) and $E_{Ef_{12}}$ (@eq-E_efficacy) (**C**) for the \"0% Y - 100% Z\", \"50% Y - 0% Z\", \"50% Y - 100% Z\" and \"50% Y - 50% Z\" scenarios. The x-axis presents the number of trials and the y-axis presents the absolute error averaged over the 974 simulated datasets. Different colors correspond to different scenarios." + +types = c("50% Y - 0% Z", "0% Y - 100% Z", "50% Y - 100% Z", "50% Y - 50% Z") +res_temp = res_simu %>% filter(Type %in% types ) +col_temp = df_col %>% arrange(type) %>% filter(type %in% types) %>% select(col) %>% as.matrix %>% as.vector + +ggplot(res_temp %>% filter(!grepl("best", name))) + geom_point(aes(x = I, y = value, color = Type), size = 1) + + geom_line(aes(x = I, y = value, color = Type), size = 0.2) + xlab("Number of trials") + + facet_wrap(~ Erreur, nrow = 1, scales = "free") + theme(legend.position = "bottom") + + ylab("Mean absolute error") + theme(legend.title = element_blank(), axis.title.x = element_text(margin = margin(t = 5, b = - 8))) + + scale_color_manual(values = col_temp) +``` + +### Is it better to measure intensity or prevalence in new pest surveys? + +In order to optimize the design of new pest surveys that might be conducted in the future, we determine which type of observations should be favored. For that purpose, we compare the results obtained with the "100% Y - 0% Z", "0% Y - 100% Z", "100% Y / 100% Z" and "100% W" scenarios (recall that W represents the unobserved number of aphids on each plant in the sample (@eq-model_W)), for different values of $\alpha_0$ that defines the average number of infested plants. With $\alpha_0$ = -1, the proportion of infested plants is generally much lower than one, while with $\alpha_0 = 2$, 100% of plants are generally infested. The case $\alpha_0$ = 1 leads to intermediate levels of infestation. + +The three parameter sets used to generate the data are given in Table 7 and are labeled A, B and C. + + + +```{r table7, cache = FALSE, echo = FALSE} + +tab7 = data.frame(Parameters = c( + # alpha_0 et alpha_1 + c("\u03B1\u2080", "\u03B1\u2081"), + + # gamma_0 + c("\u03B3\u2080\u2080", "\u03B3\u2080\u2081", "\u03B3\u2080\u2082", "\u03B3\u2080\u2083"), + + # gamma_1 + c("\u03B3\u2081\u2080", "\u03B3\u2081\u2081", "\u03B3\u2081\u2082", "\u03B3\u2081\u2083"), + + # sigma_0, eta et chi + c("\u03C3\u2080", "\u03B7", "\u03C7") + ), + + "Values set A" = c(-1, alpha1, gamma0, gamma1, sig0, eta, chi), + "Values set B" = c(1, alpha1, gamma0, gamma1, sig0, eta, chi), + "Values set C" = c(2, alpha1, gamma0, gamma1, sig0, eta, chi) + ) + +tab7 %>% + t %>% + `rownames<-`(c("Set", "A", "B", "C")) %>% + kbl(booktabs = TRUE, caption = "Parameters considered for the design of future pest surveys.") %>% + column_spec(2, bold = TRUE) %>% + kable_styling() +``` + + +@fig-height (A.1, B.1 and C.1) shows the mean absolute error $E_{\gamma}$ (@eq-E_gamma) as a function of the number of trials for the four scenarios "100% Y - 0% Z", "0% Y - 100% Z", "100% Y / 100% Z" and "100% W". @fig-height (A.2, B.2 and C.2) shows the distributions of infested plants with 40 trials corresponding to the three values of $\alpha_0$ reported in Table 7. +In case A (Table 7), the distribution of Z is such that Z is rarely close to 1 and often lower than 0.5 (@fig-height A.2). In case C, the distribution of Z is such that Z is often very close to 1 (100% of plants infested). Case B is intermediate. The accuracy of the estimated values of the model parameters $\gamma$ is better with scenario "100% Y - 0% Z" than with scenario "0% Y - 100% Z", for all number of trials. The advantage of "100% Y - 0% Z" is stronger in case of high pest prevalence (i.e., cases B and C) but very small in case of low pest prevalence (case A). For example, with 20 trials, the mean absolute error is 27% lower in the scenario "100% Y - 0% Z" than in "0% Y - 100% Z" for parameter set C (0.55 vs. 0.75), 10% lower for parameter set B (0.55 vs. 0.62), and not different for parameter set A (0.64). The "100% W" scenario leads to similar results as "100% Y - 0% Z", regardless of $\alpha_0$ and the number of trials. Results obtained with "100% Y / 100% Z" are generally similar to those obtained with "100% Y - 0% Z" and "100% W" but better than those obtained with the scenario "0% Y - 100% Z" in cases B and C. Here again, results show that 20 trials are not sufficient to obtain accurate parameter estimates. + +```{r figure8, cache = FALSE, out.width="100%", fig.height = 6, echo = FALSE} +#| label: fig-height +#| fig-cap: "Comparison of the \"100% Y - 0% Z\", \"0% Y - 100% Z\", \"100% Y / 100% Z\" and \"100% W\" scenarios according to the distribution of $Z$ and the number of trials, using the $E_{\\gamma}$ criterion (@eq-E_gamma). **A**, **B** and **C** correspond to different $Z$ distributions which are given by A.2, B.2 and C.2 (distribution for a number of trials equal to 40). A, B and C respectively correspond to $\\alpha_0 =$ -1, 1 and 2. The details of the simulation parameters are given in table 7. A1, B1 and C1 represent the absolute error $E_{\\gamma}$ averaged over the 974 simulated datasets as a function of the number of trials. Colors correspond to the different scenarios." + +res_simu_q3_1 = readRDS(file = "results/Simulations/res_simu_q3_1.rds"); +res_simu_q3_2 = readRDS(file = "results/Simulations/res_simu_q3_2.rds") + +res_simu_q3_1 = res_simu_q3_1 %>% mutate(Type = recode(Type, "100% Y - 100% Z" = "100% Y / 100% Z")) + +types = res_simu_q3_1$Type %>% unique +col = df_col %>% arrange(type) %>% filter(type %in% types) %>% select(col) %>% as.matrix %>% as.vector + +g1 = suppressWarnings(ggplot(res_simu_q3_1) + geom_point(aes(x = I, y = Mean_MAE, color = Type), size = 1) + + geom_line(aes(x = I, y = Mean_MAE, color = Type), size = 0.2) + + facet_wrap(~ alpha0, ncol = 1, labeller = label_wrap_gen(multi_line = FALSE)) + + ylab(TeX("Mean relative absolute error for $\\gamma$")) + scale_color_manual(values = col[c(1, 2, 3, 4)]) + theme(plot.margin = margin(l = 0)) + + xlab("Number of trials") + theme(legend.position = "bottom", plot.margin = margin(t = 0, l = 0))) + +legend = cowplot::get_legend(g1) + +g2 = suppressWarnings(ggplot(res_simu_q3_2) + geom_histogram(aes(Z), stat = "count", fill = "#4345a1") + facet_wrap(~ alpha0, ncol = 1) + + ylab("Number of observations") + theme(plot.margin = margin(r = 0, l = 15, t = 0)) + xlab("Number of infested beets")) + +suppressWarnings(do.call("grid.arrange", c(list(legend), list(g1 + theme(legend.position = "none"), g2), list(ncol = 2, layout_matrix = rbind(c(2, 3), c(1, 1)), heights = c(1, 0.2))))) +``` + +# Conclusion + +In order to evaluate pest treatment efficacy, numerous trials are conducted to monitor pest prevalence and intensity. Quite often, only one type of data is available and, when both prevalence and intensity are available, they are usually analysed separately. In this paper, we propose an alternative approach based on a hierarchical statistical model able to analyze intensity and prevalence data, simultaneously. + +We successfully apply the model to a real dataset including prevalence and incidence data collected to evaluate three pesticide treatments against aphids in sugar beets. The model is fitted to this datased using a Markov chain Monte Carlo algorithm, and convergence was quickly achieved after a few thousands iterations. Results show that the use of both prevalence and intensity data led to a substantial reduction of the uncertainty in the parameter estimates, compared to the use of a single type of data. + +Results obtained from simulated data confirm that, when pest prevalence and pest intensity are collected separately in different trials, the model parameters are more accurately estimated combining both prevalence and intensity trials than using one type of trials only. We also find that, when prevalence data are collected in all trials and intensity data are collected in a subset of trials, estimations and pest treatment ranking are more accurate using both types of data than using prevalence data only. Moreover, when only one type of observation can be collected in a pest survey or in an experimental trial, our analysis indicates that it is usually better to collect intensity data than prevalence data, especially in situations where all or most of the plants are expected to be infested. Finally, our simulations show that it is unlikely to obtain accurate results with fewer than 40 trials when assessing the efficacy of pest control treatments based on prevalence and intensity data. + +Although our framework is illustrated to compare the efficacy of plant pest treatments, it could be applied to other areas of research in the future, in particular for optimizing designs used in animal and human epidemiology. It is imperative to note that the ultimate selection of a design should be contingent upon the consideration of local constraints. As the model codes are made fully available, we believe that these codes could be used by different institutes to compare many different designs in the future, not only the types of designs considered in our paper. Of particular significance is the capability of our model to optimize sample sizes, with its impact contingent on the relative importance of within-trial variability compared to between-trial variability. + +# Author contributions {.unnumbered} + +AF and DM designed the study. AF performed the computations. AF and DM wrote the paper. + +# Funding {.unnumbered} + +This work was partly funded by the project SEPIM (PNRI) and by the RMT SDMAA. + +# Data availability {.unnumbered} + +Simulated data and model parameters are available without restriction. The original experimental data may be available upon request. + +# Acknowledgements {.unnumbered} + +We are grateful to Anabelle Laurent, Elma Raaijmakers, Kathleen Antoons and to the institute ITB (https://www.itbfr.org/) for their comments on this project. + +We are grateful to the INRAE MIGALE bioinformatics facility (MIGALE, INRAE, 2020. Migale bioinformatics Facility, doi: 10.15454/1.5572390655343293E12) for providing help and/or computing and/or storage resources. + +The authors are thankful to the institutes that provided us with the data, namely the French Institut Technique de la Betterave, the sugar beet organisation of the Netherlands, and the Institut Royal Belge pour l'Amélioration de la Betterave. + +# References {.unnumbered} + + diff --git a/references.bib b/references.bib new file mode 100644 index 0000000..703efe8 --- /dev/null +++ b/references.bib @@ -0,0 +1,157 @@ +@article{madden1999sampling, + title={Sampling for plant disease incidence}, + author={Madden, LV and Hughes, G}, + journal={Phytopathology}, + volume={89}, + number={11}, + pages={1088--1103}, + year={1999}, + publisher={Am Phytopath Society} +} + +@article{michel2017framework, + title={A framework based on generalised linear mixed models for analysing pest and disease surveys}, + author={Michel, Lucie and Brun, Francois and Makowski, David}, + journal={Crop Protection}, + volume={94}, + pages={1--12}, + year={2017}, + publisher={Elsevier} +} + + +@article{LAURENT2023106140, + title = {Assessment of non-neonicotinoid treatments against aphids on sugar beets}, + journal = {Crop Protection}, + volume = {164}, + pages = {106140}, + year = {2023}, + issn = {0261-2194}, + doi = {10.1016/j.cropro.2022.106140}, + url = {https://www.sciencedirect.com/science/article/pii/S0261219422002368}, + author = {Laurent, Anabelle and Favrot, Armand and Maupas, Fabienne and Royer, Cédric and Makowski, David}, + keywords = {Pest management, Green aphids, spp., Virus yellows, Sugar beet} +} + + +@article{wood2017environmental, + title={The environmental risks of neonicotinoid pesticides: a review of the evidence post 2013}, + author={Wood, Thomas James and Goulson, Dave}, + journal={Environmental Science and Pollution Research}, + volume={24}, + number={21}, + pages={17285--17325}, + year={2017}, + publisher={Springer} +} + +@article{pisa2015effects, + title={Effects of neonicotinoids and fipronil on non-target invertebrates}, + author={Pisa, Lennard W and Amaral-Rogers, Vanessa and Belzunces, Luc P and Bonmatin, Jean-Marc and Downs, Craig A and Goulson, Dave and Kreutzweiser, David P and Krupke, Christian and Liess, Matthias and McField, Melanie and others}, + journal={Environmental Science and Pollution Research}, + volume={22}, + number={1}, + pages={68--102}, + year={2015}, + publisher={Springer} +} + +@article{shaw2018metrics, + title={Metrics matter: the effect of parasite richness, intensity and prevalence on the evolution of host migration}, + author={Shaw, Allison K and Sherman, Julie and Barker, F Keith and Zuk, Marlene}, + journal={Proceedings of the Royal Society B}, + volume={285}, + number={1891}, + pages={20182147}, + year={2018}, + publisher={The Royal Society} +} + + +@Manual{rjags, + title = {rjags: Bayesian Graphical Models using MCMC}, + author = {Martyn Plummer}, + year = {2022}, + note = {R package version 4-13}, + url = {https://CRAN.R-project.org/package=rjags} +} + +@article{gelman1992inference, + title={Inference from iterative simulation using multiple sequences}, + author={Gelman, Andrew and Rubin, Donald B}, + journal={Statistical science}, + pages={457--472}, + year={1992}, + publisher={JSTOR} +} + +@book{agresti2015foundations, + title={Foundations of linear and generalized linear models}, + author={Agresti, Alan}, + year={2015}, + publisher={John Wiley \& Sons} +} + + +@article{harrison2014using, + title={Using observation-level random effects to model overdispersion in count data in ecology and evolution}, + author={Harrison, Xavier A}, + journal={PeerJ}, + volume={2}, + pages={e616}, + year={2014}, + publisher={PeerJ Inc.} +} + +@article{park2007multivariate, + title={Multivariate Poisson-lognormal models for jointly modeling crash frequency by severity}, + author={Park, Eun Sug and Lord, Dominique}, + journal={Transportation Research Record}, + volume={2019}, + number={1}, + pages={1--6}, + year={2007}, + publisher={SAGE Publications Sage CA: Los Angeles, CA} +} + +@article{osiewalski2019joint, + title={Joint modelling of two count variables when one of them can be degenerate}, + author={Osiewalski, Jacek and Marzec, Jerzy}, + journal={Computational Statistics}, + volume={34}, + pages={153--171}, + year={2019}, + publisher={Springer} +} + + +@article{gurmu2000generalized, + title={Generalized bivariate count data regression models}, + author={Gurmu, Shiferaw and Elder, John}, + journal={Economics Letters}, + volume={68}, + number={1}, + pages={31--36}, + year={2000}, + publisher={Elsevier} +} + + +@book{gelman1995bayesian, + title={Bayesian data analysis}, + author={Gelman, Andrew and Carlin, John B and Stern, Hal S and Rubin, Donald B}, + year={1995}, + publisher={Chapman and Hall/CRC} +} + + + + + + + + + + + + diff --git a/renv.lock b/renv.lock new file mode 100644 index 0000000..6a06a76 --- /dev/null +++ b/renv.lock @@ -0,0 +1,1524 @@ +{ + "R": { + "Version": "4.2.2", + "Repositories": [ + { + "Name": "CRAN", + "URL": "https://packagemanager.posit.co/cran/latest" + } + ] + }, + "Packages": { + "DBI": { + "Package": "DBI", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "dcd1743af4336156873e3ce3c950b8b9" + }, + "MASS": { + "Package": "MASS", + "Version": "7.3-60", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats", + "utils" + ], + "Hash": "a56a6365b3fa73293ea8d084be0d9bb0" + }, + "Matrix": { + "Package": "Matrix", + "Version": "1.5-1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "grid", + "lattice", + "methods", + "stats", + "utils" + ], + "Hash": "539dc0c0c05636812f1080f473d2c177" + }, + "R6": { + "Package": "R6", + "Version": "2.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "470851b6d5d0ac559e9d01bb352b4021" + }, + "RColorBrewer": { + "Package": "RColorBrewer", + "Version": "1.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "45f0398006e83a5b10b72a90663d8d8c" + }, + "askpass": { + "Package": "askpass", + "Version": "1.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "sys" + ], + "Hash": "e8a22846fff485f0be3770c2da758713" + }, + "assertthat": { + "Package": "assertthat", + "Version": "0.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "tools" + ], + "Hash": "50c838a310445e954bc13f26f26a6ecf" + }, + "backports": { + "Package": "backports", + "Version": "1.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "c39fbec8a30d23e721980b8afb31984c" + }, + "base64enc": { + "Package": "base64enc", + "Version": "0.1-3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "543776ae6848fde2f48ff3816d0628bc" + }, + "bit": { + "Package": "bit", + "Version": "4.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "f36715f14d94678eea9933af927bc15d" + }, + "bit64": { + "Package": "bit64", + "Version": "4.0.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bit", + "methods", + "stats", + "utils" + ], + "Hash": "9fe98599ca456d6552421db0d6772d8f" + }, + "blob": { + "Package": "blob", + "Version": "1.2.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "methods", + "rlang", + "vctrs" + ], + "Hash": "10d231579bc9c06ab1c320618808d4ff" + }, + "broom": { + "Package": "broom", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "backports", + "dplyr", + "ellipsis", + "generics", + "ggplot2", + "glue", + "methods", + "purrr", + "rlang", + "stringr", + "tibble", + "tidyr" + ], + "Hash": "fe13cb670e14da57fd7a466578db8ce5" + }, + "bslib": { + "Package": "bslib", + "Version": "0.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "htmltools", + "jquerylib", + "jsonlite", + "rlang", + "sass" + ], + "Hash": "56ae7e1987b340186a8a5a157c2ec358" + }, + "callr": { + "Package": "callr", + "Version": "3.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R6", + "processx", + "utils" + ], + "Hash": "461aa75a11ce2400245190ef5d3995df" + }, + "cellranger": { + "Package": "cellranger", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "rematch", + "tibble" + ], + "Hash": "f61dbaec772ccd2e17705c1e872e9e7c" + }, + "cli": { + "Package": "cli", + "Version": "3.6.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "3177a5a16c243adc199ba33117bd9657" + }, + "clipr": { + "Package": "clipr", + "Version": "0.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "3f038e5ac7f41d4ac41ce658c85e3042" + }, + "coda": { + "Package": "coda", + "Version": "0.19-4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "lattice" + ], + "Hash": "24b6d006b8b2343876cf230687546932" + }, + "colorspace": { + "Package": "colorspace", + "Version": "2.1-0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "methods", + "stats" + ], + "Hash": "f20c47fd52fae58b4e377c37bb8c335b" + }, + "cowplot": { + "Package": "cowplot", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "ggplot2", + "grDevices", + "grid", + "gtable", + "methods", + "rlang", + "scales" + ], + "Hash": "b418e8423699d11c7f2087c2bfd07da2" + }, + "cpp11": { + "Package": "cpp11", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "fa53ce256cd280f468c080a58ea5ba8c" + }, + "crayon": { + "Package": "crayon", + "Version": "1.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "grDevices", + "methods", + "utils" + ], + "Hash": "8dc45fd8a1ee067a92b85ef274e66d6a" + }, + "curl": { + "Package": "curl", + "Version": "4.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "022c42d49c28e95d69ca60446dbabf88" + }, + "data.table": { + "Package": "data.table", + "Version": "1.14.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "36b67b5adf57b292923f5659f5f0c853" + }, + "dbplyr": { + "Package": "dbplyr", + "Version": "2.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "DBI", + "R", + "R6", + "assertthat", + "blob", + "dplyr", + "ellipsis", + "glue", + "lifecycle", + "magrittr", + "methods", + "purrr", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs", + "withr" + ], + "Hash": "1f37fa4ab2f5f7eded42f78b9a887182" + }, + "digest": { + "Package": "digest", + "Version": "0.6.29", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "cf6b206a045a684728c3267ef7596190" + }, + "dplyr": { + "Package": "dplyr", + "Version": "1.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "cli", + "generics", + "glue", + "lifecycle", + "magrittr", + "methods", + "pillar", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "dea6970ff715ca541c387de363ff405e" + }, + "dtplyr": { + "Package": "dtplyr", + "Version": "1.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "crayon", + "data.table", + "dplyr", + "ellipsis", + "glue", + "lifecycle", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ], + "Hash": "f5d195cd5fcc0a77499d9da698ef2ea3" + }, + "ellipsis": { + "Package": "ellipsis", + "Version": "0.3.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "rlang" + ], + "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" + }, + "evaluate": { + "Package": "evaluate", + "Version": "0.15", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "699a7a93d08c962d9f8950b2d7a227f1" + }, + "fansi": { + "Package": "fansi", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "utils" + ], + "Hash": "83a8afdbe71839506baa9f90eebad7ec" + }, + "farver": { + "Package": "farver", + "Version": "2.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c98eb5133d9cb9e1622b8691487f11bb" + }, + "fastmap": { + "Package": "fastmap", + "Version": "1.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" + }, + "forcats": { + "Package": "forcats", + "Version": "0.5.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "ellipsis", + "magrittr", + "rlang", + "tibble" + ], + "Hash": "81c3244cab67468aac4c60550832655d" + }, + "fs": { + "Package": "fs", + "Version": "1.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "7c89603d81793f0d5486d91ab1fc6f1d" + }, + "gargle": { + "Package": "gargle", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "fs", + "glue", + "httr", + "jsonlite", + "rappdirs", + "rlang", + "rstudioapi", + "stats", + "utils", + "withr" + ], + "Hash": "9d234e6a87a6f8181792de6dc4a00e39" + }, + "generics": { + "Package": "generics", + "Version": "0.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "177475892cf4a55865868527654a7741" + }, + "ggplot2": { + "Package": "ggplot2", + "Version": "3.3.6", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "MASS", + "R", + "digest", + "glue", + "grDevices", + "grid", + "gtable", + "isoband", + "mgcv", + "rlang", + "scales", + "stats", + "tibble", + "withr" + ], + "Hash": "0fb26d0674c82705c6b701d1a61e02ea" + }, + "glue": { + "Package": "glue", + "Version": "1.6.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e" + }, + "googledrive": { + "Package": "googledrive", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "gargle", + "glue", + "httr", + "jsonlite", + "lifecycle", + "magrittr", + "pillar", + "purrr", + "rlang", + "tibble", + "utils", + "uuid", + "vctrs", + "withr" + ], + "Hash": "c3a25adbbfbb03f12e6f88c5fb1f3024" + }, + "googlesheets4": { + "Package": "googlesheets4", + "Version": "1.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cellranger", + "cli", + "curl", + "gargle", + "glue", + "googledrive", + "httr", + "ids", + "magrittr", + "methods", + "purrr", + "rematch2", + "rlang", + "tibble", + "utils", + "vctrs" + ], + "Hash": "9a6564184dc4a81daea4f1d7ce357c6a" + }, + "gridExtra": { + "Package": "gridExtra", + "Version": "2.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "grDevices", + "graphics", + "grid", + "gtable", + "utils" + ], + "Hash": "7d7f283939f563670a697165b2cf5560" + }, + "gtable": { + "Package": "gtable", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grid" + ], + "Hash": "ac5c6baf7822ce8732b343f14c072c4d" + }, + "haven": { + "Package": "haven", + "Version": "2.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "cpp11", + "forcats", + "hms", + "lifecycle", + "methods", + "readr", + "rlang", + "tibble", + "tidyselect", + "vctrs" + ], + "Hash": "e3058e4ac77f4fa686f68a1838d5b715" + }, + "highr": { + "Package": "highr", + "Version": "0.9", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "xfun" + ], + "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" + }, + "hms": { + "Package": "hms", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "ellipsis", + "lifecycle", + "methods", + "pkgconfig", + "rlang", + "vctrs" + ], + "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" + }, + "htmltools": { + "Package": "htmltools", + "Version": "0.5.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "base64enc", + "digest", + "fastmap", + "grDevices", + "rlang", + "utils" + ], + "Hash": "526c484233f42522278ab06fb185cb26" + }, + "httr": { + "Package": "httr", + "Version": "1.4.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "curl", + "jsonlite", + "mime", + "openssl" + ], + "Hash": "88d1b310583777edf01ccd1216fb0b2b" + }, + "ids": { + "Package": "ids", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "openssl", + "uuid" + ], + "Hash": "99df65cfef20e525ed38c3d2577f7190" + }, + "isoband": { + "Package": "isoband", + "Version": "0.2.5", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "grid", + "utils" + ], + "Hash": "7ab57a6de7f48a8dc84910d1eca42883" + }, + "jquerylib": { + "Package": "jquerylib", + "Version": "0.1.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "htmltools" + ], + "Hash": "5aab57a3bd297eee1c1d862735972182" + }, + "jsonlite": { + "Package": "jsonlite", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "methods" + ], + "Hash": "d07e729b27b372429d42d24d503613a0" + }, + "kableExtra": { + "Package": "kableExtra", + "Version": "1.3.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "digest", + "glue", + "grDevices", + "graphics", + "htmltools", + "knitr", + "magrittr", + "rmarkdown", + "rstudioapi", + "rvest", + "scales", + "stats", + "stringr", + "svglite", + "tools", + "viridisLite", + "webshot", + "xml2" + ], + "Hash": "49b625e6aabe4c5f091f5850aba8ff78" + }, + "knitr": { + "Package": "knitr", + "Version": "1.39", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "evaluate", + "highr", + "methods", + "stringr", + "tools", + "xfun", + "yaml" + ], + "Hash": "029ab7c4badd3cf8af69016b2ba27493" + }, + "labeling": { + "Package": "labeling", + "Version": "0.4.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "graphics", + "stats" + ], + "Hash": "3d5108641f47470611a32d0bdf357a72" + }, + "latex2exp": { + "Package": "latex2exp", + "Version": "0.9.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "magrittr", + "stringr" + ], + "Hash": "e98c8626b5142cba49800be7abc4021f" + }, + "lattice": { + "Package": "lattice", + "Version": "0.20-45", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "grid", + "stats", + "utils" + ], + "Hash": "b64cdbb2b340437c4ee047a1f4c4377b" + }, + "lifecycle": { + "Package": "lifecycle", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "rlang" + ], + "Hash": "001cecbeac1cff9301bdc3775ee46a86" + }, + "lubridate": { + "Package": "lubridate", + "Version": "1.8.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11", + "generics", + "methods" + ], + "Hash": "2ff5eedb6ee38fb1b81205c73be1be5a" + }, + "magrittr": { + "Package": "magrittr", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "7ce2733a9826b3aeb1775d56fd305472" + }, + "mgcv": { + "Package": "mgcv", + "Version": "1.8-41", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "Matrix", + "R", + "graphics", + "methods", + "nlme", + "splines", + "stats", + "utils" + ], + "Hash": "6b3904f13346742caa3e82dd0303d4ad" + }, + "mime": { + "Package": "mime", + "Version": "0.12", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "tools" + ], + "Hash": "18e9c28c1d3ca1560ce30658b22ce104" + }, + "modelr": { + "Package": "modelr", + "Version": "0.1.8", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "broom", + "magrittr", + "purrr", + "rlang", + "tibble", + "tidyr", + "tidyselect", + "vctrs" + ], + "Hash": "9fd59716311ee82cba83dc2826fc5577" + }, + "munsell": { + "Package": "munsell", + "Version": "0.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "colorspace", + "methods" + ], + "Hash": "6dfe8bf774944bd5595785e3229d8771" + }, + "nlme": { + "Package": "nlme", + "Version": "3.1-162", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "graphics", + "lattice", + "stats", + "utils" + ], + "Hash": "0984ce8da8da9ead8643c5cbbb60f83e" + }, + "openssl": { + "Package": "openssl", + "Version": "2.0.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "askpass" + ], + "Hash": "cf4329aac12c2c44089974559c18e446" + }, + "pillar": { + "Package": "pillar", + "Version": "1.9.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "cli", + "fansi", + "glue", + "lifecycle", + "rlang", + "utf8", + "utils", + "vctrs" + ], + "Hash": "15da5a8412f317beeee6175fbc76f4bb" + }, + "pkgconfig": { + "Package": "pkgconfig", + "Version": "2.0.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "utils" + ], + "Hash": "01f28d4278f15c76cddbea05899c5d6f" + }, + "prettyunits": { + "Package": "prettyunits", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" + }, + "processx": { + "Package": "processx", + "Version": "3.5.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "ps", + "utils" + ], + "Hash": "8bbae1a548d0d3fdf6647bdd9d35bf6d" + }, + "progress": { + "Package": "progress", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R6", + "crayon", + "hms", + "prettyunits" + ], + "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" + }, + "ps": { + "Package": "ps", + "Version": "1.7.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "eef74b13f32cae6bb0d495e53317c44c" + }, + "purrr": { + "Package": "purrr", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "lifecycle", + "magrittr", + "rlang", + "vctrs" + ], + "Hash": "d71c815267c640f17ddbf7f16144b4bb" + }, + "rappdirs": { + "Package": "rappdirs", + "Version": "0.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "5e3c5dc0b071b21fa128676560dbe94d" + }, + "readr": { + "Package": "readr", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "cli", + "clipr", + "cpp11", + "crayon", + "hms", + "lifecycle", + "methods", + "rlang", + "tibble", + "tzdb", + "utils", + "vroom" + ], + "Hash": "9c59de1357dc209868b5feb5c9f0fe2f" + }, + "readxl": { + "Package": "readxl", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cellranger", + "cpp11", + "progress", + "tibble", + "utils" + ], + "Hash": "170c35f745563bb307e963bde0197e4f" + }, + "rematch": { + "Package": "rematch", + "Version": "1.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "c66b930d20bb6d858cd18e1cebcfae5c" + }, + "rematch2": { + "Package": "rematch2", + "Version": "2.1.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "tibble" + ], + "Hash": "76c9e04c712a05848ae7a23d2f170a40" + }, + "renv": { + "Package": "renv", + "Version": "1.0.3", + "Source": "Repository", + "Repository": "RSPM", + "Requirements": [ + "utils" + ], + "Hash": "41b847654f567341725473431dd0d5ab" + }, + "reprex": { + "Package": "reprex", + "Version": "2.0.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "callr", + "cli", + "clipr", + "fs", + "glue", + "knitr", + "rlang", + "rmarkdown", + "rstudioapi", + "utils", + "withr" + ], + "Hash": "911d101becedc0fde495bd910984bdc8" + }, + "rjags": { + "Package": "rjags", + "Version": "4-13", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "coda" + ], + "Hash": "0fd952ba9f2ff8006975e5ba1286d00a" + }, + "rlang": { + "Package": "rlang", + "Version": "1.1.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "utils" + ], + "Hash": "a85c767b55f0bf9b7ad16c6d7baee5bb" + }, + "rmarkdown": { + "Package": "rmarkdown", + "Version": "2.14", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bslib", + "evaluate", + "htmltools", + "jquerylib", + "jsonlite", + "knitr", + "methods", + "stringr", + "tinytex", + "tools", + "utils", + "xfun", + "yaml" + ], + "Hash": "31b60a882fabfabf6785b8599ffeb8ba" + }, + "rstudioapi": { + "Package": "rstudioapi", + "Version": "0.13", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "06c85365a03fdaf699966cc1d3cf53ea" + }, + "rvest": { + "Package": "rvest", + "Version": "1.0.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "httr", + "lifecycle", + "magrittr", + "rlang", + "selectr", + "tibble", + "xml2" + ], + "Hash": "bb099886deffecd6f9b298b7d4492943" + }, + "sass": { + "Package": "sass", + "Version": "0.4.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R6", + "fs", + "htmltools", + "rappdirs", + "rlang" + ], + "Hash": "f37c0028d720bab3c513fd65d28c7234" + }, + "scales": { + "Package": "scales", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "RColorBrewer", + "farver", + "labeling", + "lifecycle", + "munsell", + "rlang", + "viridisLite" + ], + "Hash": "6e8750cdd13477aa440d453da93d5cac" + }, + "selectr": { + "Package": "selectr", + "Version": "0.4-2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "R6", + "methods", + "stringr" + ], + "Hash": "3838071b66e0c566d55cc26bd6e27bf4" + }, + "stringi": { + "Package": "stringi", + "Version": "1.7.6", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "stats", + "tools", + "utils" + ], + "Hash": "bba431031d30789535745a9627ac9271" + }, + "stringr": { + "Package": "stringr", + "Version": "1.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "glue", + "magrittr", + "stringi" + ], + "Hash": "0759e6b6c0957edb1311028a49a35e76" + }, + "svglite": { + "Package": "svglite", + "Version": "2.1.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11", + "systemfonts" + ], + "Hash": "68dfdf211af6aa4e5f050f064f64d401" + }, + "sys": { + "Package": "sys", + "Version": "3.4", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "b227d13e29222b4574486cfcbde077fa" + }, + "systemfonts": { + "Package": "systemfonts", + "Version": "1.0.4", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "90b28393209827327de889f49935140a" + }, + "tibble": { + "Package": "tibble", + "Version": "3.2.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "fansi", + "lifecycle", + "magrittr", + "methods", + "pillar", + "pkgconfig", + "rlang", + "utils", + "vctrs" + ], + "Hash": "a84e2cc86d07289b3b6f5069df7a004c" + }, + "tidyr": { + "Package": "tidyr", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11", + "dplyr", + "ellipsis", + "glue", + "lifecycle", + "magrittr", + "purrr", + "rlang", + "tibble", + "tidyselect", + "utils", + "vctrs" + ], + "Hash": "d8b95b7fee945d7da6888cf7eb71a49c" + }, + "tidyselect": { + "Package": "tidyselect", + "Version": "1.2.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang", + "vctrs", + "withr" + ], + "Hash": "79540e5fcd9e0435af547d885f184fd5" + }, + "tidyverse": { + "Package": "tidyverse", + "Version": "1.3.1", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "broom", + "cli", + "crayon", + "dbplyr", + "dplyr", + "dtplyr", + "forcats", + "ggplot2", + "googledrive", + "googlesheets4", + "haven", + "hms", + "httr", + "jsonlite", + "lubridate", + "magrittr", + "modelr", + "pillar", + "purrr", + "readr", + "readxl", + "reprex", + "rlang", + "rstudioapi", + "rvest", + "stringr", + "tibble", + "tidyr", + "xml2" + ], + "Hash": "fc4c72b6ae9bb283416bd59a3303bbab" + }, + "tinytex": { + "Package": "tinytex", + "Version": "0.38", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "xfun" + ], + "Hash": "759d047596ac173433985deddf313450" + }, + "tzdb": { + "Package": "tzdb", + "Version": "0.3.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cpp11" + ], + "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e" + }, + "utf8": { + "Package": "utf8", + "Version": "1.2.2", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "c9c462b759a5cc844ae25b5942654d13" + }, + "uuid": { + "Package": "uuid", + "Version": "1.1-0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "f1cb46c157d080b729159d407be83496" + }, + "vctrs": { + "Package": "vctrs", + "Version": "0.6.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "cli", + "glue", + "lifecycle", + "rlang" + ], + "Hash": "d0ef2856b83dc33ea6e255caf6229ee2" + }, + "viridisLite": { + "Package": "viridisLite", + "Version": "0.4.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R" + ], + "Hash": "55e157e2aa88161bdb0754218470d204" + }, + "vroom": { + "Package": "vroom", + "Version": "1.5.7", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "bit64", + "cli", + "cpp11", + "crayon", + "glue", + "hms", + "lifecycle", + "methods", + "progress", + "rlang", + "stats", + "tibble", + "tidyselect", + "tzdb", + "vctrs", + "withr" + ], + "Hash": "976507b5a105bc3bdf6a5a5f29e0684f" + }, + "webshot": { + "Package": "webshot", + "Version": "0.5.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "callr", + "jsonlite", + "magrittr" + ], + "Hash": "7261ab7f98e97c771217e6b87c085d6e" + }, + "withr": { + "Package": "withr", + "Version": "2.5.0", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "grDevices", + "graphics", + "stats" + ], + "Hash": "c0e49a9760983e81e55cdd9be92e7182" + }, + "xfun": { + "Package": "xfun", + "Version": "0.30", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "stats", + "tools" + ], + "Hash": "e83f48136b041845e50a6658feffb197" + }, + "xml2": { + "Package": "xml2", + "Version": "1.3.3", + "Source": "Repository", + "Repository": "CRAN", + "Requirements": [ + "R", + "methods" + ], + "Hash": "40682ed6a969ea5abfd351eb67833adc" + }, + "yaml": { + "Package": "yaml", + "Version": "2.3.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "458bb38374d73bf83b1bb85e353da200" + } + } +} diff --git a/results/Real_data/res.Y.nadapt.150000.niter.3e+05.rds b/results/Real_data/res.Y.nadapt.150000.niter.3e+05.rds new file mode 100644 index 0000000..8be03f4 Binary files /dev/null and b/results/Real_data/res.Y.nadapt.150000.niter.3e+05.rds differ diff --git a/results/Real_data/res.Yhalf.nadapt.150000.niter.3e+05.rds b/results/Real_data/res.Yhalf.nadapt.150000.niter.3e+05.rds new file mode 100644 index 0000000..d2aa890 Binary files /dev/null and b/results/Real_data/res.Yhalf.nadapt.150000.niter.3e+05.rds differ diff --git a/results/Real_data/res.YhalfZhalf.nadapt.150000.niter.3e+05.rds b/results/Real_data/res.YhalfZhalf.nadapt.150000.niter.3e+05.rds new file mode 100644 index 0000000..8c8494e Binary files /dev/null and b/results/Real_data/res.YhalfZhalf.nadapt.150000.niter.3e+05.rds differ diff --git a/results/Real_data/res.Zhalf.nadapt.150000.niter.3e+05.rds b/results/Real_data/res.Zhalf.nadapt.150000.niter.3e+05.rds new file mode 100644 index 0000000..c0bd8ab Binary files /dev/null and b/results/Real_data/res.Zhalf.nadapt.150000.niter.3e+05.rds differ diff --git a/results/Simulations/df_col_simu.rds b/results/Simulations/df_col_simu.rds new file mode 100644 index 0000000..4f85917 Binary files /dev/null and b/results/Simulations/df_col_simu.rds differ diff --git a/results/Simulations/res_simu_q3_1.rds b/results/Simulations/res_simu_q3_1.rds new file mode 100644 index 0000000..1790c2f Binary files /dev/null and b/results/Simulations/res_simu_q3_1.rds differ diff --git a/results/Simulations/res_simu_q3_2.rds b/results/Simulations/res_simu_q3_2.rds new file mode 100644 index 0000000..df8e6be Binary files /dev/null and b/results/Simulations/res_simu_q3_2.rds differ diff --git a/results/Simulations/res_simu_sigma_1.87.rds b/results/Simulations/res_simu_sigma_1.87.rds new file mode 100644 index 0000000..d6e5d11 Binary files /dev/null and b/results/Simulations/res_simu_sigma_1.87.rds differ diff --git a/scripts/inference_example_real_data.R b/scripts/inference_example_real_data.R new file mode 100644 index 0000000..99a83c5 --- /dev/null +++ b/scripts/inference_example_real_data.R @@ -0,0 +1,107 @@ + + +# Jags code for the model ################################################# +modelstringYZ = " + model { + + # Likelihood ##################################################### + for (i in 1:Q){ + Y[i] ~ dpois(N[i] * lb[i]) + Z[i] ~ dbinom(pi[i], N[i]) + + log(lb[i]) = beta0[ID[i]] + gamma0[INSEC[i]] + (alpha1 + + gamma1[INSEC[i]]) * TIME[i] + u[ST[i]] + epsi[i] + + pi[i] = 1 - exp(- lb[i]) + epsi[i] ~ dnorm(0, pi_eps) + } + + for (j in 1:K){ + beta0[j] ~ dnorm(alpha0, tau0) + } + + for (c in 1:M){ + u[c] ~ dnorm(0, invchi) + } + + gamma0[1] = 0 + gamma1[1] = 0 + + # Priors ######################################################### + for (s in 2:L){ + gamma0[s] ~ dnorm(0, 0.001) + gamma1[s] ~ dnorm(0, 0.001) + } + + alpha0 ~ dnorm(0, 0.001) + alpha1 ~ dnorm(0, 0.001) + sigma0 ~ dunif(0, 10) + chi ~ dunif(0, 10) + eta ~ dunif(0, 10) + + # Derived Quantities ############################################# + tau0 = pow(sigma0, -2) + invchi = pow(chi, -2) + pi_eps = pow(eta, -2) + + for (h in 2:L){ + for(t in 1 : T){ + Eff[h, t] = (1 - exp(gamma0[h] + gamma1[h] * + TIME_unique[t])) * 100 + } + } + } +" + +writeLines(modelstringYZ, con = "jags_models/modelYZ.txt") +############################################################################ + + +# Inference example on the extract of the real dataset ##################### +real_data_extract = readRDS(file = "data/real_data_extract.rds") + +data = real_data_extract %>% + mutate(tscaled = scale(DPT), st = paste(ID, Insecticide)) + +# Building scenarios ------------------------------------------------------- +scenarioY = data %>% mutate(Z = NA) +scenarioYhalfZhalf = data %>% mutate(Y = ifelse(ID == "2020 - B1A97", Y, NA), + Z = ifelse(ID == "2020 - B1A97", NA, Z)) +scenarioYhalf = data %>% filter(ID == "2020 - B1A97") %>% mutate(Z = NA) +scenarioZhalf = data %>% filter(ID == "2020 - u1CwE") %>% mutate(Y = NA) +# --------------------------------------------------------------------------- + +data = scenarioYhalfZhalf + +Y = data$Y; Q = length(Y); N = data$N; Z = data$Z + +ID = as.numeric(as.factor(as.character(data$ID))); +INSEC = as.numeric(as.factor(as.character(data$Insecticide))); +TIME = as.numeric(data$tscaled); +ST = as.numeric(as.factor(as.character(data$st))); + +K = length(unique(ID)); L = length(unique(INSEC)); +M = length(unique(ST)); + +df_TIME = suppressMessages(data %>% + group_by(DPT, tscaled) %>% + summarise(n = n()) %>% as.data.frame) + +TIME_unique = approx(df_TIME$DPT, df_TIME$tscaled, xout = c(6, 12))$y; +T = length(unique(TIME_unique)) + +data_jags = list( + "Y" = Y, "Z" = Z, "Q" = Q, "ID" = ID, "INSEC" = INSEC, + "TIME" = TIME, "ST" = ST, "K" = K, "L" = L, "M" = M, + "N" = N, "T" = T, "TIME_unique" = TIME_unique +) + +nadapt = 2000; niter = 2000 + +model <- jags.model("jags_models/modelYZ.txt", data = data_jags, + n.chains = 2, n.adapt = nadapt) + +samples <- coda.samples(model, + variable.names = c("gamma0", "gamma1", "Eff"), + n.iter = niter, thin = 10) +############################################################################ \ No newline at end of file diff --git a/scripts/simulations.R b/scripts/simulations.R new file mode 100644 index 0000000..64f7dd9 --- /dev/null +++ b/scripts/simulations.R @@ -0,0 +1,147 @@ + + +J = 4; T = 3; N = 10; K = 4; + +Block = c(1 : K); Band = c(1 : J); +Beet = c(1 : N); DPT = seq(0, 12, length.out = T); + +alpha0 = 0.5; gamma0 = c(0, - 0.13, - 1.13, - 1.24); +alpha1 = 0.16; gamma1 = c(0, 0.24, - 0.14, - 0.15); +sig0 = 1.87; chi = 0.27; eta = 0.98 + +############################################################################ + +# Building scenarios ####################################################### +seed = 1; I = 10 +data = suppressMessages(simu_data(seed = seed, I = I)) + +scenarios = list( + Y = data$dataYZ %>% mutate(Z = NA), + Z = data$dataYZ %>% mutate(Y = NA), + YhalfZhalf = data$dataYZ %>% mutate(Y = ifelse(ID <= (I / 2), NA, Y), + Z = ifelse(ID > (I / 2), NA, Z)), + YhalfZ = data$dataYZ %>% mutate(Y = ifelse(ID <= (I / 2), NA, Y)), + YZ = data$dataYZ, + W = data$dataW +) +############################################################################# + + +# Inference ################################################################# +nadapt = 2000; niter = 2000 + +res_inf = NULL + +# Inference YZ -------------------------------------------------------------- +ID = as.numeric(as.factor(as.character(scenarios$Y$ID))); +INSEC = as.numeric(as.factor(as.character(scenarios$Y$Insecticide))); +TIME = as.numeric(scenarios$Y$tscaled); +ST = as.numeric(as.factor(as.character(scenarios$Y$st))); + +K = length(unique(ID)); +L = length(unique(INSEC)); +M = length(unique(ST)) + +TIME_unique = unique(scenarios$Y$tscaled)[2 : 3] +T = length(unique(TIME_unique)) + +for(i in (1 : 5)){ + Y = scenarios[[i]]$Y; Z = scenarios[[i]]$Z; + N = scenarios[[i]]$N; Q = length(Y) + + data_jags = list( + "Y" = Y, "Z" = Z, "Q" = Q, "ID" = ID, "INSEC" = INSEC, + "TIME" = TIME, "ST" = ST, "K" = K, "L" = L, "M" = M, + "N" = N, "T" = T, "TIME_unique" = TIME_unique + ) + + model <- jags.model("jags_models/modelYZ.txt", data = data_jags, + n.chains = 2, n.adapt = nadapt) + + samples <- coda.samples(model, + variable.names = c("gamma0", "gamma1", "Eff"), + n.iter = niter, thin = 10) + + bind = list(samples); + names(bind) = paste("samples", names(scenarios)[i], sep = "_") + + res_inf = res_inf %>% append(bind) +} + +# Inference W --------------------------------------------------------------- +ID = as.numeric(as.factor(as.character(scenarios$W$ID))); +INSEC = as.numeric(as.factor(as.character(scenarios$W$Insecticide))); +TIME = as.numeric(scenarios$W$tscaled); +ST = as.numeric(as.factor(as.character(scenarios$W$st))); +SBIT = as.numeric(as.factor(as.character(scenarios$W$sbit))) + +K = length(unique(ID)); L = length(unique(INSEC)); +M = length(unique(ST)); X = length(unique(SBIT)) + +TIME_unique = unique(scenarios$W$tscaled)[2 : 3] +T = length(unique(TIME_unique)) + +W = scenarios$W$W; Q = length(W) + +data_jags = list( + "W" = W, "Q" = Q, "ID" = ID, "INSEC" = INSEC, + "TIME" = TIME, "ST" = ST, "SBIT" = SBIT, "K" = K, "L" = L, + "M" = M, "X" = X, "T" = T, "TIME_unique" = TIME_unique +) + +model <- jags.model("jags_models/modelW.txt", data = data_jags, + n.chains = 2, n.adapt = nadapt) + +samples <- coda.samples(model, + variable.names = c("gamma0", "gamma1", "Eff"), + n.iter = niter, thin = 10) + +res_inf = res_inf %>% append(list("samples_W" = samples)) +############################################################################ + + +# Formatting results ####################################################### +t = scenarios$Y$tscaled %>% unique +Eff_6_true = (1 - exp(gamma0[2 : J] + gamma1[2 : J] * t[2])) * 100 +Eff_12_true = (1 - exp(gamma0[2 : J] + gamma1[2 : J] * t[3])) * 100 +truth = c(Eff_6_true, Eff_12_true, gamma0[2 : J], gamma1[2 : J]) + +n_scenarios = length(scenarios) + +esti = lapply( + res_inf, function(x) summary(x)$statistics %>% as.data.frame %>% + rownames_to_column %>% + filter(!(grepl("gamma", rowname) & Mean == 0)) %>% + select(Mean) %>% as.matrix %>% as.vector +) + +b_inf = lapply( + res_inf, function(x) summary(x)$quantiles %>% as.data.frame %>% + rownames_to_column %>% + filter(!(grepl("gamma", rowname) & `2.5%` == 0)) %>% + select(`2.5%`) %>% as.matrix %>% as.vector +) + +b_sup = lapply( + res_inf, function(x) summary(x)$quantiles %>% as.data.frame %>% + rownames_to_column %>% + filter(!(grepl("gamma", rowname) & `2.5%` == 0)) %>% + select(`97.5%`) %>% as.matrix %>% as.vector +) + +parameters = summary(res_inf[[1]])$statistics %>% as.data.frame %>% + rownames_to_column %>% + filter(!(grepl("gamma", rowname) & Mean == 0)) %>% + select(rowname) %>% as.matrix %>% as.vector + +res = do.call(rbind, + lapply(c(1 : n_scenarios), + function(x) + data.frame(Truth = truth, Scenario = names(scenarios)[x], + I = I, seed = seed, + value = esti[[x]], parameters = parameters, + b_inf = b_inf[[x]], b_sup = b_sup[[x]] + ) + ) +) +############################################################################ \ No newline at end of file