diff --git a/.all-contributorsrc b/.all-contributorsrc new file mode 100644 index 0000000..e75dd49 --- /dev/null +++ b/.all-contributorsrc @@ -0,0 +1,4 @@ +{ + "projectName": "NHSRdatasets", + "projectOwner": "nhs-r-community" +} diff --git a/DESCRIPTION b/DESCRIPTION index ef5d69d..b49fc64 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,11 +2,12 @@ Package: NHSRdatasets Type: Package Title: NHS and Healthcare-Related Data for Education and Training Date: 2022-11-08 -Version: 0.3.2 +Version: 0.3.3 Authors@R: c( - person("Gary", "Hutson", , "g.hutson@nhs.net", "aut", comment = c(ORCID="0000-0003-3534-6143")), + person("Gary", "Hutson", , , "aut", comment = c(ORCID="0000-0003-3534-6143")), person("Tom", "Jemmett", ,"thomas.jemmett@nhs.net", "aut", comment = c(ORCID="0000-0002-6943-2990")), person("Chris", "Mainey", ,"c.mainey1@nhs.net", c("aut", "cre"), comment = c(ORCID ="0000-0002-3018-6171")), + person("Fran", "Barton", ,"fbarton@alwaysdata.net", "aut", comment = c(ORCID = "0000-0002-5650-1176")), person("Zoë", "Turner", role = "aut", email = "zoe.turner3@nhs.net", comment = c(ORCID = "0000-0003-1033-9158")), person("NHS-R community", role = "cph") ) @@ -16,11 +17,9 @@ License: CC0 Language: en-GB Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Depends: R (>= 3.5.0) BugReports: https://github.com/nhs-r-community/NHSRdatasets/issues -Imports: - tibble Suggests: caret, dplyr, @@ -29,9 +28,7 @@ Suggests: ggplot2, ggrepel, httr2, - janitor, knitr, - labelled, lattice, lme4, lmtest, @@ -39,14 +36,13 @@ Suggests: magrittr, MASS, ModelMetrics, - openxlsx2, rcmdcheck, readr, rmarkdown, rsample, scales, - stringi, synthpop, + tibble, tidyr, varhandle VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index 72731ab..deb6c7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,23 +1,40 @@ +# NHSRdatasets 0.3.3 + +- Moved the vignette for how ONS provisionally recorded deaths to an R script +available in the data-raw folder. +This is also available on the Quarto NHS-R Community website and +[GitHub](https://github.com/nhs-r-community/nhs-r-community/blob/main/blog/building-the-ons-mortality-dataset.qmd) +- Noted in this version the previous addition of the dataset from AphA (with kind +permission) from their CPD Survey, thanks to Fran Barton. Code detailing how the +data was extracted using the httr2 package and tidied can be found in the +[data-raw folder](https://github.com/nhs-r-community/NHSRdatasets/blob/main/data-raw/apha_cpd_survey.R). +- Noted in this version the previous addition of the dataset from European +Centre for Disease Prevention and Control for reported COVID19 infections and +deaths by day and country collected on the 14th December 2020, thanks to Chris +Mainey. + # NHSRdatasets 0.3.0 -- Added two new datasets for 'stranded patients' and synthetic early warning scores (NEWS), including vignettes, thanks to Gary Hutson. +- Added two new datasets for 'stranded patients' and synthetic early warning +scores (NEWS), including vignettes, thanks to Gary Hutson. - Removed Travis and added GitHub actions # NHSRdatasets 0.2.0 -- Added a new ONS Mortality data set, and vignette showing it's construction, thanks to Zoë Turner. +- Added a new ONS Mortality data set, and vignette showing its construction, +thanks to Zoë Turner. - Resaved .Rdata files as rda, using "gzip" compression. - Other minor documentation tweaks. - # NHSRdatasets 0.1.2 -- Added a new NHS Accident and Emergency (A&E) dataset with vignette, thanks to Tom Jemmett. +- Added a new NHS Accident and Emergency (A&E) dataset with vignette, thanks to +Tom Jemmett. - Typos resolved and cleaned some files. - Added pkgdown site. - # NHSRdatasets 0.1.1 -- This is the first release of this collaborative package for NHS and healthcare analysts to learn or teach R. +- This is the first release of this collaborative package for NHS and healthcare +analysts to learn or teach R. It will evolve over time as new contributions are released. diff --git a/R/LOS_model.R b/R/LOS_model.R index cca8470..785fd14 100644 --- a/R/LOS_model.R +++ b/R/LOS_model.R @@ -10,7 +10,7 @@ #' @format Data frame with five columns #' \describe{ #' \item{ID}{A fictional patient ID number} -#' \item{Organisation}{A factor representing one of ten fictional hospital trusts, e.g. Trust1} +#' \item{Organisation}{A factor representing one of ten fictional hospital trusts, for example Trust1} #' \item{Age}{Age in years of each fictional patient} #' \item{LOS}{In-hospital length of stay in days. The difference between admission and discharge date in dates} #' \item{Death}{Binary for death status: 0 = survived, 1= died in hospital} @@ -23,12 +23,11 @@ #' @examples #' data(LOS_model) #' -#' model1 <- glm(Death ~ Age + LOS, data=LOS_model, family="binomial") +#' model1 <- glm(Death ~ Age + LOS, data = LOS_model, family = "binomial") #' summary(model1) #' #' # Now with an Age, LOS, and Age*LOS interaction. -#' model2<- glm(Death ~ Age * LOS, data=LOS_model, family="binomial") +#' model2 <- glm(Death ~ Age * LOS, data = LOS_model, family = "binomial") #' summary(model2) #' "LOS_model" - diff --git a/R/ae_attendances.R b/R/ae_attendances.R index b5551c9..f7199b3 100644 --- a/R/ae_attendances.R +++ b/R/ae_attendances.R @@ -51,9 +51,11 @@ #' filter(any(type == "1")) %>% #' summarise_at(vars(attendances, breaches), sum) %>% #' arrange(desc(attendances)) %>% -#' mutate(performance = 1 - breaches / attendances, -#' overall_performance = 1 - sum(breaches) / sum(attendances), -#' rank = rank(-performance, ties.method = "first") / n()) %>% +#' mutate( +#' performance = 1 - breaches / attendances, +#' overall_performance = 1 - sum(breaches) / sum(attendances), +#' rank = rank(-performance, ties.method = "first") / n() +#' ) %>% #' ggplot(aes(rank, performance)) + #' geom_vline(xintercept = c(0.25, 0.5, 0.75), linetype = "dotted") + #' geom_hline(yintercept = 0.95, colour = "red") + @@ -61,11 +63,14 @@ #' geom_point() + #' scale_y_continuous(labels = percent) + #' theme_minimal() + -#' theme(panel.grid = element_blank(), -#' axis.text.x = element_blank()) + -#' labs(title = "4 Hour performance by trust", -#' subtitle = "Apr-16 through Mar-19", -#' x = "", y = "") +#' theme( +#' panel.grid = element_blank(), +#' axis.text.x = element_blank() +#' ) + +#' labs( +#' title = "4 Hour performance by trust", +#' subtitle = "Apr-16 through Mar-19", +#' x = "", y = "" +#' ) #' "ae_attendances" - diff --git a/R/apha_cpd_survey.R b/R/apha_cpd_survey.R index 4b8487f..d0d7131 100644 --- a/R/apha_cpd_survey.R +++ b/R/apha_cpd_survey.R @@ -1,21 +1,21 @@ #' AphA (Association of Professional Healthcare Analysts) CPD Survey Responses -#' +#' #' Full raw data from the AphA CPD Survey -#' +#' #' @source \url{https://www.aphanalysts.org/documents/cpd-survey-results-raw-data/} -#' +#' #' The survey of NHS and other healthcare data analysts was conducted in July #' 2022. The results data is made available in this package with the permission #' of AphA. -#' +#' #' @format This tidied raw data is available here as a tibble with 38 columns #' (blank or superfluous columns from the raw data were removed) and 237 rows #' (1 per respondent ID). -#' +#' #' Variables have been named using a "controlled language" approach informed #' by Emily Riederer's "Column Names as Contracts" #' \url{https://emilyriederer.netlify.app/post/column-name-contracts/}. -#' +#' #' \describe{ #' \item{*_id}{Columns ending in \code{"_id"} are numeric and represent a #' unique ID for that response.} @@ -33,20 +33,20 @@ #' \item{*_txt}{Columns ending in \code{"_txt"} contain free text responses and #' are in character format.} #' } -#' +#' #' Multi-part questions have column name stubs with sequential letters. For #' example, \code{"q20a_"}, \code{"q20b_"} and so on. #' For formatting consistency, questions with a single part still have a #' column name stub with the letter a, for example \code{"q01a_"}. -#' +#' #' Original survey questions (lightly edited) are provided as variable labels #' using the \code{{labelled}} package #' \url{https://larmarange.github.io/labelled/}. #' These labels provide more descriptive context for the "clean" column names. #' Variable labels can be viewed using \code{labelled::get_variable_labels #' (apha_cpd_survey)}. -#' -#' +#' +#' #' Survey press release web page: \url{https://www.aphanalysts.org/ltnws/nhs-at-risk-of-losing-a-generation-of-data-analysts/} -#' +#' "apha_cpd_survey" diff --git a/R/covid19.R b/R/covid19.R index db3f9d0..7abd2b6 100644 --- a/R/covid19.R +++ b/R/covid19.R @@ -1,11 +1,15 @@ #' International COVID-19 reported infection and death data #' -#' Reported COVID-19 infections, and deaths, collected and collated by the European Centre for Disease Prevention -#' and Control (ECDC, provided by day and country. -#' Data were collated and published up to 14th December 2020, and have been tidied so they are easily usable within the `tidyverse` of packages. +#' Reported COVID-19 infections, and deaths, collected and collated by the +#' European Centre for Disease Prevention and Control (ECDC, provided by day +#' and country). Data were collated and published up to 14th December 2020, +#' and have been tidied so they are easily usable within the `tidyverse` of +#' packages. #' #' Data sourced from \href{https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide}{European Centre for Disease Prevention and Control} -#' which is available under the open licence, compatible with the CC BY 4.0 license, further details available at \href{https://www.ecdc.europa.eu/en/copyright}{ECDC}. +#' which is available under the open licence, compatible with the CC BY 4.0 +#' license, further details available at +#' \href{https://www.ecdc.europa.eu/en/copyright}{ECDC}. #' #' @docType data #' @@ -14,10 +18,14 @@ #' @format Tibble with seven columns #' \describe{ #' \item{date_reported}{The date cases were reported} -#' \item{contient}{A `factor` for the geographical continent in which the reporting country is located.} -#' \item{countries_and_territories}{A `factor` for the country or territory reporting the data.} -#' \item{countries_territory_code}{A `factor` for the a three-letter country or territory code.} -#' \item{population_2019}{The reported population of the country for 2019, taken from Eurostat for Europe and the World Bank for the rest of the world.} +#' \item{contient}{A `factor` for the geographical continent in which the +#' reporting country is located.} +#' \item{countries_and_territories}{A `factor` for the country or territory +#' reporting the data.} +#' \item{countries_territory_code}{A `factor` for the a three-letter country +#' or territory code.} +#' \item{population_2019}{The reported population of the country for 2019, +#' taken from Eurostat for Europe and the World Bank for the rest of the world.} #' \item{cases}{The reported number of positive cases.} #' \item{deaths}{The reported number of deaths.} #' } @@ -36,13 +44,14 @@ #' # Create a plot of the performance for England over time #' covid19 %>% #' filter(countries_and_territories == -#' c( "United_Kingdom", "Italy", "France", "Germany", "Spain")) %>% -#' ggplot(aes(x=date_reported, y= cases, col=countries_and_territories)) + -#' geom_line()+ +#' c("United_Kingdom", "Italy", "France", "Germany", "Spain")) %>% +#' ggplot(aes(x = date_reported, y = cases, col = countries_and_territories)) + +#' geom_line() + #' scale_color_discrete("Country") + -#' scale_y_continuous(labels=comma)+ -#' labs(y="Cases", x="Date", title="Covid-19 cases for selected countries" -#' , alt="A plot of covid-19 cases in France, Germany, Italy, Spain & the UK")+ +#' scale_y_continuous(labels = comma) + +#' labs( +#' y = "Cases", x = "Date", title = "Covid-19 cases for selected countries", +#' alt = "A plot of covid-19 cases in France, Germany, Italy, Spain & the UK" +#' ) + #' theme_minimal() "covid19" - diff --git a/R/ons_mortality.R b/R/ons_mortality.R index 062669c..e124cb9 100644 --- a/R/ons_mortality.R +++ b/R/ons_mortality.R @@ -13,27 +13,28 @@ #' #' @format Data frame with five columns #' \describe{ -#' \item{category_1}{character, containing the names of the groups for counts, e.g. "Total deaths", "all ages".} -#' \item{category_2}{character, subcategory of names of groups where necessary, e.g. details of region: "East", details of age bands "15-44".} +#' \item{category_1}{character, containing the names of the groups for counts, for example "Total deaths", "all ages".} +#' \item{category_2}{character, subcategory of names of groups where necessary, for example details of region: "East", details of age bands "15-44".} #' \item{counts}{numeric, numbers of deaths in whole numbers and average numbers with decimal points. To retain the integrity of the format this column data is left as character.} #' \item{date}{date, format is yyyy-mm-dd; all dates are a Friday.} #' \item{week_no}{integer, each week in a year is numbered sequentially.} #' } #' -#' @source Collected by Zoë Turner \email{zoe.turner2@nottshc.nhs.uk}, Apr-2020 from \url{https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales} +#' @source Collected by Zoë Turner \email{zoe.turner3@nhs.net}, Apr-2020 from \url{https://www.ons.gov.uk/peoplepopulationandcommunity/birthsdeathsandmarriages/deaths/datasets/weeklyprovisionalfiguresondeathsregisteredinenglandandwales} #' #' @usage data(ons_mortality) #' #' @examples #' data(ons_mortality) #' -#'library(dplyr) -#'library(tidyr) +#' library(dplyr) +#' library(tidyr) #' -#'wideForm <- ons_mortality %>% -#' select(-week_no) %>% -#' pivot_wider(names_from = date, -#' values_from = counts -#' ) +#' wideForm <- ons_mortality %>% +#' select(-week_no) %>% +#' pivot_wider( +#' names_from = date, +#' values_from = counts +#' ) #' "ons_mortality" diff --git a/R/stranded_patient_model.R b/R/stranded_patient_model.R index 871ff62..795ec48 100644 --- a/R/stranded_patient_model.R +++ b/R/stranded_patient_model.R @@ -20,7 +20,7 @@ #' \item{frailty_index}{An initial index assessment to say if the patient is frail or not. This is needed for alignment of service provision.} #' } #' -#' @source Synthetically generated by Gary Hutson \email{g.hutson@nhs.net}, Mar-2021. +#' @source Synthetically generated by Gary Hutson, Mar-2021. #' #' @usage data(stranded_data) #' @@ -29,6 +29,6 @@ #' library(dplyr) #' data("stranded_data") #' stranded_data %>% -#' glimpse() +#' glimpse() #' "stranded_data" diff --git a/R/synthetic_news_data.R b/R/synthetic_news_data.R index 8013d02..558aa0e 100644 --- a/R/synthetic_news_data.R +++ b/R/synthetic_news_data.R @@ -23,7 +23,7 @@ #' \item{died}{Indicator to monitor patient death} #' } #' -#' @source Generated by Dr. Muhammed Faisal and created by Gary Hutson \email{g.hutson@nhs.net}, Mar-2021 +#' @source Generated by Dr. Muhammed Faisal and created by Gary Hutson, Mar-2021 #' #' @usage data(synthetic_news_data) #' @@ -32,5 +32,5 @@ #' library(dplyr) #' data("synthetic_news_data") #' synthetic_news_data %>% -#' glimpse() +#' glimpse() "synthetic_news_data" diff --git a/README.Rmd b/README.Rmd index 0ac848f..38e96a1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -74,3 +74,14 @@ It's great to see the {NHSRdatasets} package and data used as it promotes the wo The data used to build the mortality dataset in this package is released under © Crown copyright and is free to use under the terms of the Open Government Licence. Any subsequent use should include a source accreditation to ONS to help people find the original releases and any statistical corrections that may have occurred since this was included in this pacakge - Source: Office for National Statistics licensed under the Open Government Licence. + +## Contributors + + + + + + + + + diff --git a/data-raw/apha_cpd_survey.R b/data-raw/apha_cpd_survey.R index 75700f8..6fef355 100644 --- a/data-raw/apha_cpd_survey.R +++ b/data-raw/apha_cpd_survey.R @@ -1,13 +1,13 @@ # Source: https://www.aphanalysts.org/documents/cpd-survey-results-raw-data/ # (Accessed 25 January 2024) -# +# # The .xlsx file cannot be directly downloaded in `R` from its URL as there is # protection on the file: it must be downloaded by submitting the form on the # web page, either manually by clicking the button on the page, or by # simulating the form submission using form headers within an {httr2} pipeline. # HTML form code, copied from above URL, and used as source for form headers: -# +# #