diff --git a/R/salmonella_outbreak_de_1998_doc.R b/R/salmonella_outbreak_de_1998_doc.R new file mode 100644 index 0000000..b438fae --- /dev/null +++ b/R/salmonella_outbreak_de_1998_doc.R @@ -0,0 +1,25 @@ +#' @title Linelist for fictionalized salmonella outbreak in Stegen, Germany +#' +#' @description +#' Linelist data for fictionalized salmonella outbreak that occured in a highschool +#' in Stegen, Germany (1998) +#' +#' @details +#' This is a fake case linelist with individual level data +#' on clinical and epidemiological information, with a focus on foods +#' eaten at a cafeteria (exposures) + +#' @source +#' This case study was first designed by Alain Moren and Gilles Desve for EPIET. It is based on an investigation conducted by Anja Hauri, RKI, Berlin, 1998. +#' Data is fictional and was inspired by Nygren et al. Tick-borne encephalitis: acute clinical manifestations and severity in 581 cases from Germany, 2018-2020. Journal of Infection. 2023 Apr 1;86(4):369-75 +#' +#' This is version 1.0 (for the appliedepidata package. Earlier data versions used by EPIET are not considered) +#' +#' This dataset is English. +#' +#' This dataset is licensed under [GPL3](https://www.gnu.org/licenses/gpl-3.0.html) +#' +#' @format A data frame with 291 rows and 20 columns +#' +#' @docType data +"stegentira_data" \ No newline at end of file diff --git a/_pkgdown.yml b/_pkgdown.yml index 1e1ca97..bf99a71 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -36,3 +36,7 @@ reference: contents: - mpox_linelist - mpox_aggregate_table + +- subtitle: Fictional linelist data from Salmonella outbreak in Stegen, Germany (1998) - English + contents: + - stegentira_data \ No newline at end of file diff --git a/data-raw/stegentira_data.R b/data-raw/stegentira_data.R new file mode 100644 index 0000000..8d2393d --- /dev/null +++ b/data-raw/stegentira_data.R @@ -0,0 +1,10 @@ +## code to prepare `stegentira_data` dataset goes here + +# Define the path to the Excel file in inst/extdata +file_path <- system.file("extdata", "stegentira_data.csv", package = "appliedepidata") + +# Read in the Excel file using rio +stegentira_data <- rio::import(file_path) + +# Save the data as an internal .rda file in the data/ directory +usethis::use_data(stegentira_data, overwrite = TRUE) diff --git a/data/stegentira_data.rda b/data/stegentira_data.rda new file mode 100644 index 0000000..723d181 Binary files /dev/null and b/data/stegentira_data.rda differ diff --git a/inst/extdata/stegentira_data.csv b/inst/extdata/stegentira_data.csv new file mode 100644 index 0000000..05d882d --- /dev/null +++ b/inst/extdata/stegentira_data.csv @@ -0,0 +1,292 @@ +uniquekey,ill,dateonset,sex,age,tira,tportion,wmousse,dmousse,mportion,beer,redjelly,fruitsalad,tomato,mince,salmon,horseradish,chickenwin,roastbeef,pork +210,1,1998-06-27,1,18,1,3,0,1,1,0,0,0,0,0,0,0,0,0,1 +12,1,1998-06-27,0,57,1,1,0,1,1,0,0,1,0,1,1,1,0,0,0 +288,1,1998-06-27,1,56,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0 +186,1,1998-06-27,0,17,1,1,1,0,,0,1,0,0,0,9,0,0,0,9 +20,1,1998-06-27,1,19,1,2,0,0,0,1,0,0,0,0,0,0,0,0,0 +148,1,1998-06-27,0,16,1,2,1,1,1,0,0,1,0,1,1,0,1,0,0 +201,1,1998-06-27,0,19,1,3,0,1,1,0,0,1,0,0,0,0,0,0,0 +106,1,1998-06-27,0,19,1,2,1,1,1,0,1,1,0,0,0,1,1,0,0 +272,1,1998-06-27,1,40,1,2,1,1,2,1,0,0,1,0,1,0,0,1,0 +50,1,1998-06-27,0,53,1,1,1,1,1,0,1,0,0,0,1,1,1,0,0 +216,1,1998-06-27,1,20,1,3,1,1,3,1,1,1,1,0,1,1,0,0,1 +141,1,1998-06-27,0,23,1,2,1,,1,0,1,1,0,1,0,1,0,0,1 +91,1,1998-06-27,0,17,1,1,0,1,1,1,1,0,1,1,0,0,1,0,1 +98,1,1998-06-27,1,19,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1 +200,1,1998-06-27,0,15,1,1,1,1,2,0,0,1,1,0,0,0,0,0,1 +109,1,1998-06-27,0,19,1,2,0,0,0,0,1,1,0,0,1,1,0,0,0 +117,1,1998-06-27,0,57,1,1,0,1,1,0,1,1,0,1,1,0,0,0,1 +281,1,1998-06-27,0,17,1,2,1,1,2,0,0,0,0,0,0,0,0,0,0 +269,1,1998-06-27,1,47,1,2,0,1,2,,1,0,1,0,1,0,1,0,1 +77,1,1998-06-27,1,16,1,2,0,0,0,0,1,1,0,0,0,0,0,0,0 +196,1,1998-06-27,0,17,1,1,1,1,1,0,1,0,0,1,1,1,0,0,0 +16,1,1998-06-27,1,19,1,2,0,0,0,0,0,0,1,0,0,0,0,0,1 +168,1,1998-06-27,0,17,1,2,1,1,2,0,0,1,0,0,1,0,0,0,0 +102,1,1998-06-27,0,17,1,2,1,1,2,0,0,1,0,1,0,1,0,0,1 +204,1,1998-06-27,0,18,1,3,0,1,1,0,0,0,1,0,0,0,0,0,0 +205,1,1998-06-27,1,18,1,3,0,1,3,1,0,0,0,0,1,0,1,0,1 +271,1,1998-06-27,0,29,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0 +48,1,1998-06-27,1,14,1,3,1,1,1,,0,0,0,0,0,0,1,0,1 +287,1,1998-06-27,0,13,1,2,1,1,,0,0,0,0,0,0,0,0,0,0 +25,1,1998-06-27,0,21,1,2,1,1,2,0,1,1,0,0,0,0,1,0,0 +15,1,1998-06-27,1,19,1,2,1,0,2,0,0,1,1,0,0,1,0,0,0 +45,1,1998-06-27,1,20,1,1,1,1,3,1,0,0,0,0,1,1,1,0,1 +125,1,1998-06-27,1,57,1,2,0,1,2,1,0,1,1,1,0,1,0,0,0 +113,1,1998-06-27,0,38,1,1,0,1,1,0,0,0,1,0,0,0,1,0,1 +284,1,1998-06-27,1,18,1,1,1,1,2,0,0,0,0,1,1,0,0,0,1 +121,1,1998-06-27,1,64,1,1,1,1,2,0,1,0,0,0,0,1,0,0,0 +52,1,1998-06-27,0,57,1,1,,1,,,0,0,0,0,0,1,0,0,0 +207,1,1998-06-27,0,27,1,1,1,1,1,0,0,1,1,0,1,1,0,0,0 +63,1,1998-06-27,1,23,1,1,1,0,1,0,1,1,1,0,1,1,1,0,1 +43,1,1998-06-27,0,21,1,2,1,0,1,0,0,1,0,0,0,0,0,0,0 +175,1,1998-06-27,0,21,1,2,0,0,0,1,0,1,1,1,0,0,0,0,1 +214,1,1998-06-27,1,20,1,2,1,1,1,1,1,0,0,0,1,1,1,0,1 +251,1,1998-06-27,0,20,1,2,0,1,1,0,1,1,1,0,1,1,0,1,1 +213,1,1998-06-27,0,18,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0 +65,1,1998-06-27,1,24,1,2,1,1,2,0,0,1,0,1,0,0,0,0,1 +159,1,1998-06-27,1,24,1,3,0,1,3,1,1,0,0,0,0,0,0,1,1 +29,1,1998-06-27,0,19,1,1,1,1,2,0,0,1,0,0,0,0,0,0,1 +14,1,1998-06-28,1,58,1,1,0,1,,0,1,0,0,0,9,1,0,0,0 +165,1,1998-06-28,1,19,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0 +145,1,1998-06-28,1,18,1,1,0,1,2,1,0,0,1,0,0,0,1,0,1 +202,1,1998-06-28,0,27,1,1,0,1,2,0,1,1,0,0,1,1,0,0,0 +255,1,1998-06-28,0,20,1,2,0,1,2,0,1,0,0,0,0,0,0,0,0 +169,1,1998-06-28,0,19,1,2,1,1,1,0,1,1,1,0,0,0,0,0,0 +274,1,1998-06-28,1,54,1,2,0,1,1,1,0,1,0,1,0,0,0,1,1 +254,1,1998-06-28,1,23,1,2,0,0,0,1,0,1,1,1,1,0,0,1,1 +61,1,1998-06-28,1,18,1,1,1,1,2,1,0,0,1,1,1,1,1,0,1 +2,1,1998-06-28,0,16,1,2,1,1,2,0,1,0,1,0,1,1,1,0,0 +86,1,1998-06-28,0,26,1,1,0,1,3,1,0,0,0,1,0,0,0,0,1 +59,1,1998-06-28,1,16,1,2,1,1,2,1,1,0,0,0,0,0,1,0,1 +74,1,1998-06-28,0,14,1,2,0,1,2,0,0,0,1,0,0,0,0,0,0 +133,1,1998-06-28,0,20,1,2,1,1,2,0,0,1,1,0,0,0,0,0,1 +115,1,1998-06-28,0,56,1,1,1,1,1,0,1,0,0,0,0,0,1,0,0 +103,1,1998-06-28,0,46,1,3,0,1,1,0,0,1,1,1,0,1,1,0,1 +138,1,1998-06-28,1,18,0,0,1,1,3,1,0,1,0,0,0,0,0,0,0 +70,1,1998-06-28,1,19,1,2,,1,2,1,1,1,1,1,1,1,1,0,1 +173,1,1998-06-28,1,21,1,2,0,0,0,1,1,0,0,0,0,0,0,0,0 +144,1,1998-06-28,1,18,1,1,0,0,0,0,1,1,1,1,0,0,0,0,1 +212,1,1998-06-28,0,80,,,1,1,1,0,0,0,0,0,1,0,0,0,0 +234,1,1998-06-28,1,23,1,2,0,0,0,1,0,1,0,0,0,0,1,0,1 +156,1,1998-06-28,1,20,0,0,0,1,3,1,0,0,0,0,0,0,0,0,1 +146,1,1998-06-28,1,50,1,3,1,1,2,1,1,1,0,0,1,0,1,0,1 +152,1,1998-06-28,1,18,1,1,0,1,2,0,1,0,0,0,0,0,1,0,1 +31,1,1998-06-28,0,48,,,1,1,1,0,1,1,0,0,1,1,0,0,0 +279,1,1998-06-28,1,21,1,1,0,0,0,1,1,0,1,0,1,0,0,0,1 +36,1,1998-06-28,0,47,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0 +75,1,1998-06-28,0,18,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0 +286,1,1998-06-28,0,47,1,1,0,0,0,0,1,0,1,1,1,0,0,1,0 +215,1,1998-06-28,1,17,1,1,1,1,2,1,0,0,0,0,0,0,0,0,1 +56,1,1998-06-28,0,,1,1,0,0,0,0,1,1,0,1,1,0,1,0,1 +199,1,1998-06-28,1,52,1,1,,0,0,0,0,0,1,0,1,0,1,0,1 +154,1,1998-06-28,1,20,1,3,1,1,2,0,1,0,0,1,0,1,0,0,1 +27,1,1998-06-28,1,32,1,2,0,1,1,0,1,1,0,1,1,1,0,0,0 +42,1,1998-06-28,0,17,1,1,1,1,1,0,0,0,0,0,1,0,1,0,0 +49,1,1998-06-28,0,16,1,3,1,1,1,,1,0,0,0,0,0,0,0,1 +96,1,1998-06-28,1,17,0,0,0,0,0,1,0,0,1,1,1,0,1,0,1 +66,1,1998-06-28,0,20,1,1,0,0,0,0,1,0,0,0,0,0,1,0,1 +104,1,1998-06-28,1,19,1,2,0,0,0,1,0,1,0,1,0,1,1,0,1 +13,1,1998-06-28,0,19,0,0,1,1,1,0,1,1,0,0,0,0,1,1,1 +221,1,1998-06-28,1,,1,1,1,1,1,0,0,0,0,0,1,0,0,0,1 +51,1,1998-06-28,0,19,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0 +177,1,1998-06-29,1,19,1,1,0,1,,0,0,0,0,1,0,0,0,0,0 +111,1,1998-06-29,1,19,1,1,0,1,2,1,0,0,0,0,1,1,0,0,0 +242,1,1998-06-29,0,19,1,2,1,1,,0,1,1,1,1,9,9,0,1,0 +143,1,1998-06-29,1,48,0,0,1,1,1,0,0,1,0,1,0,0,1,0,1 +278,1,1998-06-29,0,19,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 +62,1,1998-06-29,0,52,1,1,,1,1,0,0,1,0,0,0,0,1,0,0 +176,1,1998-06-29,1,19,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0 +134,1,1998-06-29,0,,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0 +256,0,,1,21,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1 +55,0,,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +235,0,1998-07-05,0,20,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0 +58,0,,1,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 +194,1,1998-06-27,0,39,0,0,1,1,2,0,0,1,1,0,0,0,0,0,0 +282,0,,0,17,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 +137,0,,0,22,0,0,1,1,2,1,1,1,0,0,1,0,0,0,0 +118,0,,1,13,0,0,0,0,0,,1,0,0,0,0,0,1,1,1 +220,0,,0,20,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0 +24,0,1998-07-02,1,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +187,0,,1,17,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1 +190,0,,1,17,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0 +189,0,,1,62,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0 +195,0,1998-06-28,0,17,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0 +231,0,,1,21,0,0,1,0,,,0,0,0,0,1,1,0,1,1 +239,0,,0,54,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +289,0,,0,20,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0 +184,0,,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +126,0,,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +209,0,,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +290,0,,1,22,,,,,,1,0,0,0,0,1,1,0,0,1 +67,0,1998-07-01,0,18,1,1,0,0,0,0,1,1,0,1,1,0,0,0,1 +170,0,,0,18,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +230,0,,1,,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0 +151,1,1998-06-28,1,19,1,2,1,1,2,1,0,0,0,1,1,0,0,0,0 +283,0,,1,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +211,0,,1,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +69,0,,0,57,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1 +35,0,,1,20,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1 +233,0,,1,20,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0 +208,0,1998-06-30,0,48,1,2,1,1,3,0,0,1,1,0,1,1,1,0,0 +155,0,,1,19,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1 +198,0,,0,44,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1 +40,0,,1,19,0,0,0,0,0,1,1,0,1,0,1,0,0,0,1 +119,0,,0,46,0,0,0,0,0,,0,0,0,0,1,1,1,0,1 +139,0,1998-06-28,0,51,1,1,,1,1,1,0,1,1,0,1,0,0,0,0 +180,0,,1,17,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1 +188,0,,1,16,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0 +157,0,,1,22,0,0,1,0,1,1,0,0,0,1,1,1,0,0,0 +80,0,,1,18,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0 +203,0,,0,19,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0 +280,0,,0,20,0,0,1,1,,1,1,1,1,0,0,0,1,0,1 +37,0,,1,47,0,0,0,0,0,,0,0,1,0,1,0,0,0,0 +193,0,,0,15,0,0,,,,0,1,1,0,1,1,1,0,0,0 +53,0,,1,19,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +22,0,,0,20,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +85,0,,0,13,0,0,1,1,3,0,0,0,0,1,0,0,0,0,0 +232,0,,1,20,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1 +258,0,,0,21,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +265,0,,1,20,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1 +54,0,,1,18,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +237,0,1998-07-09,1,20,0,0,0,0,0,1,0,0,1,1,1,1,0,0,1 +266,0,,1,45,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1 +236,0,,1,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +88,0,1998-06-29,1,44,1,1,0,1,2,0,1,1,0,0,1,1,0,0,0 +10,0,,1,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +11,0,1998-07-02,1,22,0,0,1,1,2,,1,0,0,0,1,1,0,0,0 +174,0,,1,58,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0 +185,0,,1,17,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +161,0,,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +226,0,,0,,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +273,0,,0,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +227,0,1998-07-06,0,20,1,1,0,1,2,1,0,0,1,0,0,0,0,0,0 +260,0,,1,,0,0,0,0,0,,0,0,1,1,1,1,0,1,0 +223,0,,1,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +107,0,,0,19,1,1,0,0,0,1,0,0,1,0,1,0,1,0,0 +183,0,,1,16,0,0,0,0,0,1,0,0,0,0,1,0,1,1,1 +250,0,,1,26,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1 +253,0,,1,20,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0 +44,0,,0,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +182,0,,0,17,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +228,0,,0,19,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 +285,0,,1,57,0,0,0,0,0,0,1,1,1,1,1,1,0,1,1 +83,0,,0,18,0,0,0,1,2,0,1,0,0,0,1,0,1,0,0 +248,0,1998-06-27,0,19,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1 +136,0,,0,17,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1 +172,0,,1,18,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0 +46,0,,0,18,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0 +114,0,,0,23,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 +166,0,,1,19,0,0,0,0,0,0,1,1,1,0,0,1,1,0,0 +164,0,1998-06-27,0,45,0,0,,1,2,0,0,1,1,1,0,1,0,0,0 +101,0,,1,15,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 +21,0,,0,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +158,0,,0,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +108,0,,0,25,0,0,1,1,2,0,1,0,1,0,1,0,1,0,0 +34,0,1998-06-28,0,20,0,0,0,0,0,,0,0,0,1,1,0,0,0,0 +276,0,,1,20,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0 +222,0,,1,20,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +130,0,,0,48,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0 +275,0,,0,23,,,0,0,0,0,1,1,1,0,0,0,0,0,0 +72,0,,1,17,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +218,0,1998-06-30,0,17,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +267,0,,0,46,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0 +76,0,,1,19,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0 +241,0,,1,21,0,0,0,0,0,,0,0,0,0,1,0,0,0,1 +171,0,,0,18,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0 +142,0,,1,24,0,0,0,0,0,1,0,0,0,0,1,1,0,1,1 +89,0,1998-06-27,1,18,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +105,0,,0,52,1,1,1,1,,0,1,0,1,0,1,0,1,0,1 +39,0,,1,50,0,0,0,1,2,1,1,1,0,0,0,0,0,0,1 +167,0,1998-06-28,1,23,1,1,1,1,1,1,0,1,1,1,1,1,0,0,1 +140,0,,1,55,1,1,0,0,0,1,0,1,0,0,1,1,0,0,1 +124,0,,0,19,0,0,1,1,2,0,0,1,0,0,1,1,1,1,0 +131,0,,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +17,0,,0,18,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 +73,0,,1,19,0,0,1,0,2,1,0,0,0,0,1,0,1,0,0 +97,0,,1,50,1,3,,1,2,0,1,1,0,1,0,0,1,0,1 +5,0,,0,49,0,0,0,0,0,,1,0,0,0,0,0,1,0,0 +123,0,,1,65,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1 +9,0,,1,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 +99,0,,1,43,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 +84,0,,0,43,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0 +229,0,1998-06-26,0,20,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 +116,0,,1,59,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1 +238,1,1998-06-28,0,21,1,1,,1,1,0,1,0,0,0,0,0,0,0,0 +217,0,1998-06-27,1,19,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1 +122,0,,0,20,0,0,0,0,0,0,0,0,1,1,1,0,1,0,1 +240,0,,1,56,1,1,1,1,2,1,1,1,0,1,0,0,0,0,1 +95,0,,0,14,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0 +110,0,,0,55,0,0,0,0,0,0,1,1,1,1,0,0,0,1,0 +41,0,1998-06-27,0,47,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0 +206,0,,0,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +23,0,,0,42,1,1,0,1,1,0,1,0,0,1,0,0,1,0,1 +257,0,,0,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +163,0,1998-06-27,1,49,0,0,0,1,1,0,0,0,0,0,1,1,0,0,1 +64,0,1998-07-01,0,45,1,1,,1,2,0,1,1,0,1,9,9,1,0,9 +100,0,,0,42,0,0,0,0,0,0,0,0,1,0,1,1,0,0,1 +120,0,,0,60,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 +160,0,,1,16,0,0,1,1,1,0,0,1,0,1,0,0,0,0,1 +224,0,,1,20,1,3,0,0,0,1,0,0,0,0,0,0,1,0,0 +94,0,,0,18,1,2,,1,1,0,0,1,0,0,0,0,0,0,0 +60,0,,1,18,0,0,0,0,0,1,0,0,0,1,0,0,1,1,1 +263,0,,1,21,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +191,0,,1,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +147,0,,0,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +179,0,,0,17,0,0,1,1,2,0,0,0,0,1,0,0,0,0,1 +93,0,,0,17,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0 +57,0,,0,18,1,1,1,1,3,0,1,1,1,0,0,0,0,0,0 +112,0,,1,45,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0 +268,0,,0,58,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1 +243,0,,1,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +219,0,,1,24,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +247,0,,1,57,0,0,0,0,0,1,1,1,1,1,1,0,1,1,1 +38,0,,1,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +33,0,,1,20,0,0,0,0,0,1,0,1,0,0,1,0,1,0,1 +68,1,,1,56,1,3,1,1,2,0,1,1,1,0,0,0,1,0,0 +4,0,,1,46,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1 +150,1,1998-06-28,1,18,1,2,1,1,1,0,1,1,1,1,1,1,0,0,0 +79,0,1998-07-02,0,17,0,0,1,,1,0,0,0,1,1,0,0,0,0,0 +178,0,,1,37,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1 +127,0,,1,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +153,0,,1,19,0,0,0,0,0,1,0,0,0,1,0,1,1,1,0 +261,0,,1,21,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0 +92,0,1998-06-27,1,19,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1 +90,0,,0,18,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 +264,0,,1,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +277,0,,1,53,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0 +181,0,,1,17,1,2,0,1,2,1,1,0,1,1,0,0,1,0,1 +197,0,,1,19,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +225,0,,0,19,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 +18,0,,1,19,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1 +1,0,,0,17,1,2,1,1,1,0,1,0,0,0,0,0,0,0,0 +252,0,,1,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +47,0,,0,18,0,0,,1,3,0,1,0,0,0,0,0,1,0,0 +245,0,,1,24,0,0,0,0,0,1,0,0,0,1,1,1,1,0,1 +78,0,1998-06-29,0,15,1,1,0,1,2,0,1,0,0,1,1,1,0,0,1 +162,0,1998-07-01,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +81,0,1998-06-27,1,16,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1 +3,0,,0,48,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0 +82,0,1998-06-28,1,18,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +32,0,,1,21,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 +71,0,,1,59,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0 +30,0,,1,57,,,1,1,2,1,1,1,0,1,1,1,0,0,1 +28,0,1998-07-04,0,15,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0 +135,0,,1,,1,1,0,1,1,1,1,0,1,0,0,0,0,0,1 +246,0,,1,20,0,0,0,0,0,1,0,0,1,1,1,0,0,1,1 +149,0,,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +7,0,,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +19,0,,1,18,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0 +249,0,,0,,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0 +128,0,,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +6,0,,1,20,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1 +192,0,,1,16,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 +270,0,,1,51,0,0,0,0,0,0,0,0,1,1,1,0,1,0,0 +262,0,,1,18,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 +259,0,,0,21,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0 +87,0,,1,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +8,0,,1,18,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1 +129,0,,1,18,0,0,,1,1,0,0,0,1,0,1,0,0,1,1 +26,0,1998-06-30,1,21,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0 +132,0,,0,17,0,0,0,0,0,,0,0,0,0,0,0,0,0,0 +244,0,,1,21,0,0,0,0,0,1,0,0,1,1,0,0,1,0,1 +291,0,,0,22,1,1,1,1,,,1,0,0,1,1,0,0,0,1 diff --git a/inst/extdata/tableoftables.xlsx b/inst/extdata/tableoftables.xlsx index 5c2f7e0..0b0b6f5 100644 Binary files a/inst/extdata/tableoftables.xlsx and b/inst/extdata/tableoftables.xlsx differ diff --git a/man/stegentira_data.Rd b/man/stegentira_data.Rd new file mode 100644 index 0000000..5483697 --- /dev/null +++ b/man/stegentira_data.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/salmonella_outbreak_de_1998_doc.R +\docType{data} +\name{stegentira_data} +\alias{stegentira_data} +\title{Linelist for fictionalized salmonella outbreak in Stegen, Germany} +\format{ +Linelist with 291 rows and 20 columns +} +\source{ +This case study was first designed by Alain Moren and Gilles Desve for EPIET. It is based on an investigation conducted by Anja Hauri, RKI, Berlin, 1998. +Data is fictional and was inspired by Nygren et al. Tick-borne encephalitis: acute clinical manifestations and severity in 581 cases from Germany, 2018-2020. Journal of Infection. 2023 Apr 1;86(4):369-75 + +This is version 1.0 (for the appliedepidata package. Earlier data versions used by EPIET are not considered) + +This dataset is English. + +This dataset is licensed under \href{https://www.gnu.org/licenses/gpl-3.0.html}{GPL3} +} +\usage{ +stegentira_data +} +\description{ +Linelist data for fictionalized salmonella outbreak that occured in a highschool +in Stegen, Germany (1998) +} +\details{ +This is a fake case linelist with individual level data +on clinical and epidemiological information, with a focus on foods +eaten at a cafeteria (exposures) +} +\keyword{datasets} diff --git a/vignettes/adding-data.Rmd b/vignettes/adding-data.Rmd index a39c6e8..06c7c7f 100644 --- a/vignettes/adding-data.Rmd +++ b/vignettes/adding-data.Rmd @@ -3,8 +3,11 @@ title: "Adding data" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Adding data} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + markdown: + wrap: 72 --- ```{r, include = FALSE} @@ -19,40 +22,22 @@ knitr::opts_chunk$set( library(appliedepidata) ``` - - # Guide to contributing new datasets -## Package folder structure - -To below is a simplified explanation of the -[R packages data chapter](https://r-pkgs.org/data.html). For a fuller understanding, -read that chapter. - +## PACKAGE FOLDER STRUCTURE -- The following package folders are important: - - **data**: R datasets go in `data` folder - - **inst/extdata**: Non-R datasets go in `inst>extdata` folder. - - **internal data**: When you build a package, the `Rda` datasets - (from `data` folder) can become "internal" (more efficient for file storage). - These are accessed by calling `package::dataset` (e.g. - `appliedepidata::AJS_AmTiman`. They can also be imported directly from github - using link to the file in data folder e.g. 'rio()', or the - `appliedepidata::get_data` or `appliedepidata::save_data` functions. - - **data-raw**: Contains R scripts used for creating the - exported or internal data (e.g. if you have edited a dataset or used {usethis} - to internalise the dataset) - - **sysdata**: Not relevant for current package setup. In some setups you are - supposed to put *tableoftables* in sysdata (i.e. just for package usage). - However for our current setup leave in *extdata*. +### Overview +To diagram and descriptions below are a simplified explanation of the [R packages data +chapter](https://r-pkgs.org/data.html). For a fuller understanding, read +that chapter. -```bash +``` bash . ├── appliedepidata.Rproj ├── _pkgdown.yml ├── data -│ └── newdata.rda +│ └── newdata.rda (note: internalised) ├── data-raw │ └── newdata.R ├── inst @@ -65,145 +50,251 @@ read that chapter. └── newdata.Rd ``` +### Folders you will edit + +- **inst/extdata**: Newly added datasets go in here in their original + filetype, e.g. .xlsx, .csv, but also .rds files. These files will be + downloaded with the `save_data()` function.\ +- **data-raw**: Contains R scripts used for processing and + internalizing datasets (making them more efficient for storage), + i.e. for turning newly added datasets in `inst/extdata` to .rda + files +- **R**: Contains R scripts in Roxygen2 format which define the + functions and datasets. When run this creates the R documentation + files (.Rd files) which are saved in the **man** folder. Multiple + datasets can be defined in one script, which is sensible as some are + grouped together (e.g. part of the same outbreak) +- **vignettes**: Contains detailed instructions on how to use + particular functions. They complement the function-level + documentation in the **man** folder by giving broader explanation. + +### Folders you will not edit + +These contain files outputted by running the code in the folders above. + +- **data**: Contains internalised and processed R datasets (.rda + files). These are created when internalizing the data (running code in data-raw folder), which makes them more efficient for file storage. +- **man**: Contains the R documentation files for package functions + and datasets (created when running the code in the **R** folder). + Each .Rd file has information about a specific function or dataset. +- **tests**: Contains test files for developer use only. + + +## ADDING DATA + +This describes the process for adding a file to the repo. Note that any original datasets need to be added to the inst/exta folder, even if already in .rda format. + +If you are adding a dataset from an existing R package, you can skip to +step 3 below. + +1. **Name your file appropriately** + + A. You can name it whatever you want, but stick to basic naming + conventions. + + B. Ensure that there is not already file in *tableoftables.xlsx* + named the same. + + C. Avoid generic names like: `linelist_cleaned.xlsx` or + `survey_data.xlsx`. + + D. Use consistent and descriptive names without spaces (e.g., + `AJS_AmTiman`, `sitrep_mortality_survey`). + + E. Name files from the same group (e.g. from the same outbreak or + for the same case study) with the same prefix. E.g: + `examplename_data` and `examplename_population` for a case linelist + and corresponding denominator table respectively. + +2. **Place your file in the correct folder** + + A. Add into `inst/extdata` folder. This is what will be downloaded when running + the save_data() function. If a shapefile, zip first. -## Adding a file -This describes the process for adding a file to the repo. Note that the processes -for adding a non-R file (any file that is not `.rda`) and an R file (any file -already in`.rda` format) are slightly different. -If you are adding a dataset from an existing R package, you can skip to step 3 -below. - -1. Name your file appropriately - a. You can name it whatever you want, but stick to basic naming conventions. - b. Ensure that there is not already file in *tableoftables.xlsx* named the same. - c. Avoid generic names like: `linelist_cleaned.xlsx` or `survey_data.xlsx`. - d. Use consistent and descriptive names without spaces (e.g., `AJS_AmTiman`, - `sitrep_mortality_survey`). -2. Place your file in the correct folder - a. A *non-R* file (e.g. `xlsx`, `shp`, `zip`) goes in `inst/extdata` folder - i. If adding a shapefile then zip it - b. An *R* file (e.g. `rda`, `rds`) goes in `data` folder -3. Reproducibly edit dataset and internalise (see `data-raw/AJS_AmTiman.R` for example) - a. In your console run `usethis::use_data_raw()` - b. This creates an R script in the `data-raw` folder. - c. Read in the file by defining the path with `system.file`. - i. If you are editing a file already in the package (e.g. shortening the - Ebola linelist for a course), make sure you read in the original dataset here. - Document this properly with {roxygen} and in the metadata as described - below. - d. Make any edits necessary to your dataset in a reproducible way. - e. Save and internalise the dataset with `usethis::usedata()`. -4. Add documentation for each dataset added - a. This is done in an R script in the `R` folder. - b. Name the script something that will allow reviewers to find it (e.g. `AJS_chad`) and suffix with `_doc` so that it can be differentiated from functions. - c. Place all the documentation for datasets in that group within the same script. - d. Ensure to clearly document the source and license for the dataset. - e. Add in an explanation for each variable, if you have a data dictionary you - use [appliedepidata::create_desc()](https://appliedepi.github.io/appliedepidata/reference/create_desc.html) to help with this. - i. You could also create a data dictionary for use with this function, see - the [data dictionary walk-through]((https://appliedepi.github.io/appliedepidata/articles/data-dictionaries.html).) -5. Add the datasets to `_pkgdown.yml` - a. Group relevant datasets under the same subtitle (suffix with the language) - b. The names here correspond to the name in quotations at the end of your - description file from point 4 above, as well as the name of the file (without - file extension). -6. Add the dataset to the `tablesoftables.xlsx` as described below. +3. **Build the package with the added data** + A. Press Ctrl + Shift + B to build the package with the newly added + data. This will mean the data is recognized for the next step. + + + +4. **Reproducibly edit dataset and internalize** (see + `data-raw/AJS_AmTiman.R` for example) + + A. In your console run + `usethis::use_data_raw("")`. + + B. This creates an R script in the `data-raw` folder. It will + already contain a comment at the top saying "code to prepare + data goes here" and a `usethis::use_data()` function + to internalise the dataset (i.e. to produce the rda file) + + C. Edit the R script to correctly read in the file using + `system.file()`, as below. Necessary edits to the dataset should + also go here. Then run this script to internalise the data, and + close. + + D. NOTE that if you want to process the raw data to create a dataset that should also be accessible to users (e.g. via the `get_data()` or `save_data()` functions), make sure that the processed data also gets saved into inst/extdata as version 2. Make sure you internalise (with use_data) both versions of the data. + +```{r, eval=FALSE} +## code to prepare `examplename_linelist` dataset goes here + +# Define the path to the Excel file in inst/extdata +file_path <- system.file("extdata", "", package = "appliedepidata") + +# Read in the Excel file using rio +examplename_linelist <- rio::import(file_path) + +# Other code for editing the file can go here + + + + +# Save the data as an internal .rda file in the data/ directory +usethis::use_data(examplename_linelist, overwrite = TRUE) + + +``` + +5. **Add the dataset to the `tablesoftables.xlsx` as described below.** + +6. **Add documentation for each dataset added** + + A. Create a new R script in the `R` folder, with one file for all + datasets in a group. The file name should be the group name + generated from tableoftables (column P), with suffix '\_doc'. Note + the easiest is to copy and edit an existing R script in the folder. + + B. Ensure to clearly document the source and license for the + dataset. + + C. Ensure to put the correct name of the dataset at the bottom of + each dataset description (under @docType). E.g. + `examplename_linelist`. -# Defining dataset metadata (adding to `tablesoftables.xlsx`) + D. Run devtools::document() to create the actual R documentation. + This will be an .Rd file in the `man` folder, with file name + corresponding with the dataset name (e.g. `examplename_linelist.Rd`) -Below is a table explaining how to fill in each variable in the dataset -metadata Excel sheet (`tablesoftables.xlsx`). This guide helps ensure -consistency and completeness when adding new datasets to your collection. -- **name**: The filename of the dataset as it appears in the `inst/extdata` - directory, **without** the file extension. This should be unique within - the dataset group, and ideally also within the *tableoftables* - (i.e. avoid generic names like: `linelist_cleaned.xlsx` or `survey_data.xlsx`). - Use consistent and descriptive names without spaces (e.g., `AJS_AmTiman`, - `mortality_survey`). -- **type**: The category or type of the dataset (e.g., `linelist`, - `population`, `shape`, `survey`, `dictionary`). +6. **Add the datasets to `_pkgdown.yml`** -- **extension**: The file extension (e.g., `xlsx`, `zip`). + A. Subtitle: Describe the group of linelists. State the year in + brackets and language at the end -- **type_version**: Used to identify the *original* dataset and its associated - child data. Increment when format or variables change. If there are multiple - linelists in one group, this would increment with the type. + B. Contents: List the datasets. Again, the names here correspond to + the name of the dataset without extension, e.g + "examplename_linelist". -- **data_version**: Used to identify the *original* dataset and its associated - child data. Increment when format or variables change. Ensure you document - changes in the appropriate 'data-raw' file. + C. Make sure to have correct indentation and use of dashes. See + prior examples in the file. -- **language**: Language code using [ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g., `en`, `fr`). +## ADDING DATASET METADATA (adding to `tablesoftables.xlsx`) -- **country**: Country code using [ISO 3166-1 alpha-3 codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) (e.g., `tcd`). +Below is a table explaining how to fill in each variable in the dataset +metadata Excel sheet (`tablesoftables.xlsx`). This guide helps ensure +consistency and completeness when adding new datasets to your +collection. -- **scale**: Geographic scale (e.g., `subnational`, `national`, or `international`). +- **name**: The filename of the dataset as it appears in the + `inst/extdata` directory, **without** the file extension. This + should be unique within the dataset group, and ideally also within + the *tableoftables* (i.e. avoid generic names like: + `linelist_cleaned.xlsx` or `survey_data.xlsx`). Use consistent and + descriptive names without spaces (e.g., `AJS_AmTiman`, + `mortality_survey`). -- **subject**: Main subject of the dataset (e.g., `acute jaundice syndrome`). +- **type**: The category or type of the dataset (e.g., `linelist`, + `population`, `shape`, `survey`, `dictionary`). -- **context**: Context of the data (e.g., `outbreak`, `survey`). +- **extension**: The file extension (e.g., `xlsx`, `zip`). -- **fictional**: Is the dataset fictional (`yes`) or real (`no`)? +- **type_version**: Used to identify the *original* dataset and its + associated child data. Increment when format or variables change. If + there are multiple linelists in one group, this would increment with + the type. -- **year**: Year the data was collected (e.g., `2016`). This is the *earliest* - year in the dataset. +- **data_version**: Used to identify the *original* dataset and its + associated child data. Increment when format or variables change. + Ensure you document changes in the appropriate 'data-raw' file. -- **description**: Brief description of the dataset. Ideally, copy from - roxygen documentation. +- **language**: Language code using [ISO 639-1 + codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g., + `en`, `fr`). -- **usage**: Intended usage (e.g., `{sitrep} walkthroughs`, `training`). +- **country**: Country code using [ISO 3166-1 alpha-3 + codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) (e.g., + `tcd`). -- **license**: License for dataset (e.g., `gpl3`, `mit`). +- **scale**: Geographic scale (e.g., `subnational`, `national`, or + `international`). -- **group_identifier**: *DO NOT EDIT* - Created by concatinating function in excel. - High-level identifier combining `subject`, `context`, `country`, and `year` - (e.g.,`acute_jaundice_syndrome_outbreak_tcd_2016`). +- **subject**: Main subject of the dataset (e.g., + `acute jaundice syndrome`). -- **unique_identifier**: *DO NOT EDIT* - Combines `group_identifier`, `type`, - `type_version`, `data_version`, `context`, and `year` to create a unique - identifier (e.g. `acute_jaundice_syndrome_outbreak_tcd_2016_linelist_1`). +- **context**: Context of the data (e.g., `outbreak`, `survey`). +- **fictional**: Is the dataset fictional (`yes`) or real (`no`)? -For example, when adding an Ebola dataset, you would enter the information as -shown below. The original dataset (whether it's from {outbreaks} or another source) -would be considered `type_version` 1. If it’s the only linelist in its group, it -remains `type_version` 1. If a completely different linelist is added (not just an -edited version), increment the `type_version` accordingly. +- **year**: Year the data was collected (e.g., `2016`). This is the + *earliest* year in the dataset (even if fictional). -For any changes to the data (such as cleaning or changing nums of rows or columns), -increment the `data_version` (e.g., `data_version` 2), but the `type_version` -remains the same to indicate that it’s a derivative (or "child") of the original. -Each child dataset gets its own entry. +- **description**: Brief description of the dataset. Ideally, copy + from roxygen documentation. -If a dataset is translated into a different language, create a new entry for -the translated version while keeping the `data_version` and `type_version` the same, -but editing the `language` column accordingly. -This ensures you can trace back the parent-child relationship between datasets. +- **usage**: Intended usage (e.g., `{sitrep} walkthroughs`, + `training`). +- **license**: License for dataset (e.g., `gpl3`, `mit`). -| **Variable** | **Example Entry** | -|-----------------------|------------------------------------| -| **name** | `ebola_linelist_cleaned` | -| **type** | `linelist` | -| **extension** | `xlsx` | -| **type_version** | `1` | -| **data_version** | `1` | -| **language** | `en` | -| **country** | `lbr` | -| **scale** | `national` | -| **subject** | `ebola` | -| **context** | `outbreak` | -| **fictional** | `yes` | -| **year** | `2014` | -| **description** | Linelist data from the Ebola virus | -| | disease outbreak in Liberia in | -| | 2014. | -| **usage** | `introexercises`, etc. | -| **license** | `gpl3` | -| **group_identifier** | `ebola_outbreak_lbr_2014` | -| **unique_identifier** | `ebola_outbreak_lbr_2014_linelist_1_1_outbreak_2014`| +- **group_identifier**: *DO NOT EDIT* - Created by concatinating + function in excel. High-level identifier combining `subject`, + `context`, `country`, and `year` + (e.g.,`acute_jaundice_syndrome_outbreak_tcd_2016`). +- **unique_identifier**: *DO NOT EDIT* - Combines `group_identifier`, + `type`, `type_version`, `data_version`, `context`, and `year` to + create a unique identifier (e.g. + `acute_jaundice_syndrome_outbreak_tcd_2016_linelist_1`). + +For example, when adding an Ebola dataset, you would enter the +information as shown below. The original dataset (whether it's from +{outbreaks} or another source) would be considered `type_version` 1. If +it's the only linelist in its group, it remains `type_version` 1. If a +completely different linelist is added (not just an edited version), +increment the `type_version` accordingly. + +For any changes to the data (such as cleaning or changing nums of rows +or columns), increment the `data_version` (e.g., `data_version` 2), but +the `type_version` remains the same to indicate that it's a derivative +(or "child") of the original. Each child dataset gets its own entry. + +If a dataset is translated into a different language, create a new entry +for the translated version while keeping the `data_version` and +`type_version` the same, but editing the `language` column accordingly. +This ensures you can trace back the parent-child relationship between +datasets. + +| **Variable** | **Example Entry** | +|----------------------------|--------------------------------------------| +| **name** | `ebola_linelist_cleaned` | +| **type** | `linelist` | +| **extension** | `xlsx` | +| **type_version** | `1` | +| **data_version** | `1` | +| **language** | `en` | +| **country** | `lbr` | +| **scale** | `national` | +| **subject** | `ebola` | +| **context** | `outbreak` | +| **fictional** | `yes` | +| **year** | `2014` | +| **description** | Linelist data from the Ebola virus | +| | disease outbreak in Liberia in | +| | 2014\. | +| **usage** | `introexercises`, etc. | +| **license** | `gpl3` | +| **group_identifier** | `ebola_outbreak_lbr_2014` | +| **unique_identifier** | `ebola_outbreak_lbr_2014_linelist_1_1_outbreak_2014` |