From d109af2cc58769c6a5ec636f3c172107fe2f79d6 Mon Sep 17 00:00:00 2001 From: Witold Wolski Date: Thu, 25 Apr 2024 13:04:47 +0200 Subject: [PATCH] introducing tests for factorial designs --- NAMESPACE | 1 + R/ContrastsSimpleImpute.R | 24 ++++++- R/simulate_LFQ_data.R | 6 +- R/tidyMS_missigness_V2.R | 20 +++++- R/tidyMS_stats.R | 84 +++++++++++++++------- man/ContrastsMissing.Rd | 24 ++++++- man/MissingHelpers.Rd | 22 +++++- man/sim_lfq_data_protein_2Factor_config.Rd | 1 + man/summarize_stats.Rd | 22 +++++- man/summarize_stats_factors.Rd | 18 +++++ vignettes/Modelling2Factors.Rmd | 12 ++++ 11 files changed, 198 insertions(+), 36 deletions(-) create mode 100644 man/summarize_stats_factors.Rd diff --git a/NAMESPACE b/NAMESPACE index 2f54503e5..75e76312e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -169,6 +169,7 @@ export(summarise_QValues) export(summarize_hierarchy) export(summarize_stats) export(summarize_stats_all) +export(summarize_stats_factors) export(summarize_stats_quantiles) export(summary_ROPECA_median_p.scaled) export(table_facade) diff --git a/R/ContrastsSimpleImpute.R b/R/ContrastsSimpleImpute.R index c2f7d170e..7e6cd01d5 100644 --- a/R/ContrastsSimpleImpute.R +++ b/R/ContrastsSimpleImpute.R @@ -24,16 +24,38 @@ #' lProt$rename_response("transformedIntensity") #' #' Contr <- c("dil.b_vs_a" = "group_A - group_Ctrl") -#' #ContrastsMissing$debug("get_contrasts") #' csi <- ContrastsMissing$new(lProt, contrasts = Contr) #' csi$get_contrast_sides() #' #' res <- csi$get_contrasts() +#' #' stopifnot(nrow(res) == (protIntensity$protein_Id |> unique() |> length())) #' res$contrast |> table() #' stopifnot((res$p.value |> is.na() |> sum()) == 0) #' plot(res$diff, -log10(res$p.value), pch = ".") #' csi$column_description() +#' x<- csi$get_Plotter() +#' p <- x$volcano() +#' pdf(file = NULL) +#' print(p) +#' dev.off() +#' +#' dd <- prolfqua::sim_lfq_data_protein_2Factor_config(Nprot = 100,weight_missing = 0.1) +#' +#' Contrasts <- c("c1" = "TreatmentA - TreatmentB", +#' "C2" = "BackgroundX- BackgroundZ", +#' "c3" = "`TreatmentA:BackgroundX` - `TreatmentA:BackgroundZ`", +#' "c4" = "`TreatmentB:BackgroundX` - `TreatmentB:BackgroundZ`" +#' ) +#' lProt <- LFQData$new(dd$data, dd$config) +#' lProt$rename_response("transformedIntensity") +#' +#' csi <- ContrastsMissing$new(lProt, contrasts = Contrasts) +#' res <- csi$get_contrasts() +#' pl <- csi$get_Plotter() +#' pdf(file = NULL) +#' pl$volcano() +#' dev.off() ContrastsMissing <- R6::R6Class( "ContrastsMissing", inherit = ContrastsInterface, diff --git a/R/simulate_LFQ_data.R b/R/simulate_LFQ_data.R index 6c86d2162..26c0c2444 100644 --- a/R/simulate_LFQ_data.R +++ b/R/simulate_LFQ_data.R @@ -217,6 +217,7 @@ sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE, weight_ #' @param seed seed for reproducibility, if NULL no seed is set. #' @export #' @examples +#' undebug(sim_lfq_data_protein_2Factor_config) #' x <- sim_lfq_data_protein_2Factor_config() #' stopifnot("data.frame" %in% class(x$data)) #' stopifnot("AnalysisConfiguration" %in% class(x$config)) @@ -228,11 +229,11 @@ sim_lfq_data_protein_2Factor_config <- function(Nprot = 10, if (!is.null(seed)) { set.seed(seed) } - res <- sim_lfq_data(Nprot = 10, PEPTIDE = FALSE, + res <- sim_lfq_data(Nprot = Nprot, PEPTIDE = FALSE, fc = list(A = c(D = -2, U = 2, N = 0), B = c(D = 1, U = -4), C = c(D = -1, U = -4)), prop = list(A = c(D = 10, U = 10), B = c(D = 5, U = 20), C = c(D = 15, U = 25))) res <- res |> mutate(Treatment = case_when(group %in% c("Ctrl", "A") ~ "A", TRUE ~ "B")) - res <- res |> mutate(Background = case_when(group %in% c("Ctrl", "C") ~ "Z", TRUE ~ "X")) + data <- res |> mutate(Background = case_when(group %in% c("Ctrl", "C") ~ "Z", TRUE ~ "X")) if (with_missing) { data <- data[!which_missing(data$abundance,weight_missing = weight_missing),] } @@ -244,6 +245,7 @@ sim_lfq_data_protein_2Factor_config <- function(Nprot = 10, atable$nr_children = "nr_peptides" atable$factors["Treatment"] = "Treatment" atable$factors["Background"] = "Background" + atable$factorDepth <- 2 atable$hierarchy[["protein_Id"]] = c("proteinID", "idtype2") atable$set_response("abundance") diff --git a/R/tidyMS_missigness_V2.R b/R/tidyMS_missigness_V2.R index c32cd9f1e..29db4625a 100644 --- a/R/tidyMS_missigness_V2.R +++ b/R/tidyMS_missigness_V2.R @@ -12,8 +12,20 @@ #' mh$impute_weighted_lod() #' mh$impute_lod() #' mh$get_poolvar() -#' mh$get_contrast_estimates(Contrasts) +#' bb <- mh$get_contrast_estimates(Contrasts) #' mh$get_contrasts(Contrasts) +#' +#' dd <- prolfqua::sim_lfq_data_protein_2Factor_config(Nprot = 100,weight_missing = 0.1) +#' +#' Contrasts <- c("c1" = "TreatmentA - TreatmentB", +#' "C2" = "BackgroundX- BackgroundZ", +#' "c3" = "`TreatmentA:BackgroundX` - `TreatmentA:BackgroundZ`", +#' "c4" = "`TreatmentB:BackgroundX` - `TreatmentB:BackgroundZ`" +#' ) +#' mh <- prolfqua::MissingHelpers$new(dd$data, dd$config, prob = 0.8,weighted = TRUE) +#' mh$get_stats()$interaction |> table() +#' mh$get_contrast_estimates(Contrasts) +#' MissingHelpers <- R6::R6Class( "MissingHelpers", @@ -44,8 +56,7 @@ MissingHelpers <- R6::R6Class( }, get_stats = function(){ if (is.null(self$stats)) { - self$stats = prolfqua::summarize_stats(self$data, self$config) - self$stats = prolfqua::make_interaction_column(self$stats, columns = self$config$table$factor_keys_depth(), sep = ":") + self$stats = prolfqua::summarize_stats_factors(self$data, self$config) } return(self$stats) }, @@ -90,6 +101,9 @@ MissingHelpers <- R6::R6Class( pooled <- pooled |> mutate(df = ifelse(df == 0, 1, df)) return(pooled) }, + #' @description + #' get contrast estimates + #' @param Contrasts named array with contrasts get_contrast_estimates = function( Contrasts ){ diff --git a/R/tidyMS_stats.R b/R/tidyMS_stats.R index a94c60cbd..937a50d7a 100644 --- a/R/tidyMS_stats.R +++ b/R/tidyMS_stats.R @@ -162,11 +162,28 @@ poolvar <- function(res1, config, method = c("V1","V2")){ #' #' res1 <- summarize_stats(data, config) #' -summarize_stats <- function(pdata, config){ +#' res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +#' res2$config$table$factorDepth <- 2 +#' stats <- summarize_stats(res2$data, res2$config) +#' stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys_depth(), sep = ":") +#' stopifnot(nrow(stats) == 40) +#' +#' stats <- summarize_stats(res2$data, res2$config, factor_key = res2$config$table$factor_keys()[1]) +#' stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys()[1], sep = ":") +#' stopifnot(nrow(stats) == 20) +#' stats <- summarize_stats(res2$data, res2$config, factor_key = res2$config$table$factor_keys()[2]) +#' stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys()[2], sep = ":") +#' stopifnot(nrow(stats) == 20) +#' +#' stats <- summarize_stats(res2$data, res2$config, factor_key = NULL) +#' stopifnot(nrow(stats) == 10) +#' +summarize_stats <- function(pdata, config, factor_key = config$table$factor_keys_depth()){ + print(factor_key) pdata <- complete_cases(pdata, config) intsym <- sym(config$table$get_response()) hierarchyFactor <- pdata |> - dplyr::group_by(!!!syms( c(config$table$hierarchy_keys(), config$table$factor_keys_depth()) )) |> + dplyr::group_by(!!!syms( c(config$table$hierarchy_keys(), factor_key) )) |> dplyr::summarize(nrReplicates = dplyr::n(), nrMeasured = sum(!is.na(!!intsym)), nrNAs = sum(is.na(!!intsym)), @@ -177,13 +194,46 @@ summarize_stats <- function(pdata, config){ .groups = "drop_last") |> dplyr::ungroup() hierarchyFactor <- hierarchyFactor |> - dplyr::mutate(dplyr::across(config$table$factor_keys_depth(), as.character)) + dplyr::mutate(dplyr::across(all_of(factor_key), as.character)) if (config$table$is_response_transformed == FALSE) { - hierarchyFactor |> dplyr::mutate(CV = sd/meanAbundance * 100) -> hierarchyFactor + hierarchyFactor <- hierarchyFactor |> dplyr::mutate(CV = sd/meanAbundance * 100) + } + if (is.null(factor_key)) { + hierarchyFactor <- dplyr::mutate(hierarchyFactor, !!config$table$factor_keys()[1] := "All") } return(ungroup(hierarchyFactor)) } + +#' compute var sd etc for all factor levels +#' +#' @export +#' @examples +#' # example code +#' res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +#' xx <- summarize_stats_factors(res2$data, res2$config) +#' stopifnot(nrow(xx) == 80) +#' +summarize_stats_factors <- function(pdata, config){ + fac_res <- list() + stats <- summarize_stats( + pdata, + config) + fac_res[["interaction"]] <- prolfqua::make_interaction_column(stats, columns = config$table$factor_keys_depth(),sep = ":") + + if (config$table$factorDepth > 1 ) { # if 1 only then done + for (factor in config$table$factor_keys_depth()) { + stats <- summarize_stats( + pdata, + config,factor_key = factor) + fac_res[[factor]] <- prolfqua::make_interaction_column(stats, columns = factor, sep = ":") + } + } + intfact <- dplyr::bind_rows(fac_res) + return(intfact) +} + + #' Compute mean, sd, and CV for e.g. Peptides, or proteins, for all samples. #' #' @param pdata data.frame @@ -201,28 +251,13 @@ summarize_stats <- function(pdata, config){ #' res1 <- summarize_stats_all(bb$data, bb$config) #' #' stopifnot((res1 |> dplyr::filter(group_ == "All") |> nrow()) == (res1 |> nrow())) -#' -summarize_stats_all <- function(pdata, config){ - pdata <- complete_cases(pdata, config) - intsym <- sym(config$table$get_response()) - hierarchy <- pdata |> - dplyr::group_by(!!!syms( config$table$hierarchy_keys() )) |> - dplyr::summarize(nrReplicates = dplyr::n(), - nrMeasured = sum(!is.na(!!intsym)), - sd = sd(!!intsym,na.rm = TRUE), - var = sd(!!intsym,na.rm = TRUE), - meanAbundance = mean(!!intsym,na.rm = TRUE), - medianAbundance = median(!!intsym, na.rm = TRUE), - .groups = "drop_last") |> dplyr::ungroup() - - hierarchy <- dplyr::mutate(hierarchy, !!config$table$factor_keys()[1] := "All") - hierarchyFactor <- hierarchy - if (config$table$is_response_transformed == FALSE) { - hierarchyFactor |> dplyr::mutate(CV = sd/meanAbundance * 100) -> hierarchyFactor - } - return(ungroup(hierarchyFactor)) +#' res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +#' resSt <- summarize_stats_all(res2$data, res2$config) +summarize_stats_all <- function(pdata, config) { + summarize_stats(pdata, config, factor_key = NULL) } + #' summarize stats output (compute quantiles) #' @param stats_res result of running `summarize_stats` #' @param config AnalysisConfiguration @@ -247,7 +282,6 @@ summarize_stats_all <- function(pdata, config){ #' stats_res <- summarize_stats(data, config) #' sq <- summarize_stats_quantiles(stats_res, config) #' sq <- summarize_stats_quantiles(stats_res, config, stats = "sd") -#' #' stats_res <- summarize_stats(data, config) #' xx <- summarize_stats_quantiles(stats_res, config, probs = seq(0,1,by = 0.1)) #' ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = group_)) + geom_line() + geom_point() diff --git a/man/ContrastsMissing.Rd b/man/ContrastsMissing.Rd index e24d41916..5bf5dee93 100644 --- a/man/ContrastsMissing.Rd +++ b/man/ContrastsMissing.Rd @@ -29,16 +29,38 @@ lProt <- LFQData$new(protIntensity, config) lProt$rename_response("transformedIntensity") Contr <- c("dil.b_vs_a" = "group_A - group_Ctrl") -#ContrastsMissing$debug("get_contrasts") csi <- ContrastsMissing$new(lProt, contrasts = Contr) csi$get_contrast_sides() res <- csi$get_contrasts() + stopifnot(nrow(res) == (protIntensity$protein_Id |> unique() |> length())) res$contrast |> table() stopifnot((res$p.value |> is.na() |> sum()) == 0) plot(res$diff, -log10(res$p.value), pch = ".") csi$column_description() +x<- csi$get_Plotter() +p <- x$volcano() +pdf(file = NULL) +print(p) +dev.off() + +dd <- prolfqua::sim_lfq_data_protein_2Factor_config(Nprot = 100,weight_missing = 0.1) + +Contrasts <- c("c1" = "TreatmentA - TreatmentB", + "C2" = "BackgroundX- BackgroundZ", + "c3" = "`TreatmentA:BackgroundX` - `TreatmentA:BackgroundZ`", + "c4" = "`TreatmentB:BackgroundX` - `TreatmentB:BackgroundZ`" + ) +lProt <- LFQData$new(dd$data, dd$config) +lProt$rename_response("transformedIntensity") + +csi <- ContrastsMissing$new(lProt, contrasts = Contrasts) +res <- csi$get_contrasts() +pl <- csi$get_Plotter() +pdf(file = NULL) +pl$volcano() +dev.off() } \seealso{ Other modelling: diff --git a/man/MissingHelpers.Rd b/man/MissingHelpers.Rd index 61805065b..40d87c355 100644 --- a/man/MissingHelpers.Rd +++ b/man/MissingHelpers.Rd @@ -20,8 +20,20 @@ mh$get_LOD() mh$impute_weighted_lod() mh$impute_lod() mh$get_poolvar() -mh$get_contrast_estimates(Contrasts) +bb <- mh$get_contrast_estimates(Contrasts) mh$get_contrasts(Contrasts) + +dd <- prolfqua::sim_lfq_data_protein_2Factor_config(Nprot = 100,weight_missing = 0.1) + +Contrasts <- c("c1" = "TreatmentA - TreatmentB", + "C2" = "BackgroundX- BackgroundZ", + "c3" = "`TreatmentA:BackgroundX` - `TreatmentA:BackgroundZ`", + "c4" = "`TreatmentB:BackgroundX` - `TreatmentB:BackgroundZ`" + ) +mh <- prolfqua::MissingHelpers$new(dd$data, dd$config, prob = 0.8,weighted = TRUE) +mh$get_stats()$interaction |> table() +mh$get_contrast_estimates(Contrasts) + } \section{Public fields}{ \if{html}{\out{
}} @@ -131,10 +143,18 @@ initialize \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-MissingHelpers-get_contrast_estimates}{}}} \subsection{Method \code{get_contrast_estimates()}}{ +get contrast estimates \subsection{Usage}{ \if{html}{\out{
}}\preformatted{MissingHelpers$get_contrast_estimates(Contrasts)}\if{html}{\out{
}} } +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{Contrasts}}{named array with contrasts} +} +\if{html}{\out{
}} +} } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/man/sim_lfq_data_protein_2Factor_config.Rd b/man/sim_lfq_data_protein_2Factor_config.Rd index f39e0ebeb..16d20ad24 100644 --- a/man/sim_lfq_data_protein_2Factor_config.Rd +++ b/man/sim_lfq_data_protein_2Factor_config.Rd @@ -22,6 +22,7 @@ sim_lfq_data_protein_2Factor_config( Simulate data, protein, with config with 2 factros Treatment and Background } \examples{ +undebug(sim_lfq_data_protein_2Factor_config) x <- sim_lfq_data_protein_2Factor_config() stopifnot("data.frame" \%in\% class(x$data)) stopifnot("AnalysisConfiguration" \%in\% class(x$config)) diff --git a/man/summarize_stats.Rd b/man/summarize_stats.Rd index 06bd61f58..d525802ff 100644 --- a/man/summarize_stats.Rd +++ b/man/summarize_stats.Rd @@ -6,7 +6,7 @@ \alias{summarize_stats_quantiles} \title{Compute mean, sd, and CV for all Peptides, or proteins, for all interactions and all samples.} \usage{ -summarize_stats(pdata, config) +summarize_stats(pdata, config, factor_key = config$table$factor_keys_depth()) summarize_stats_all(pdata, config) @@ -46,6 +46,22 @@ data <- bb$data res1 <- summarize_stats(data, config) +res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +res2$config$table$factorDepth <- 2 +stats <- summarize_stats(res2$data, res2$config) +stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys_depth(), sep = ":") +stopifnot(nrow(stats) == 40) + +stats <- summarize_stats(res2$data, res2$config, factor_key = res2$config$table$factor_keys()[1]) +stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys()[1], sep = ":") +stopifnot(nrow(stats) == 20) +stats <- summarize_stats(res2$data, res2$config, factor_key = res2$config$table$factor_keys()[2]) +stats <- prolfqua::make_interaction_column(stats, columns = res2$config$table$factor_keys()[2], sep = ":") +stopifnot(nrow(stats) == 20) + +stats <- summarize_stats(res2$data, res2$config, factor_key = NULL) +stopifnot(nrow(stats) == 10) + bb <- prolfqua::sim_lfq_data_protein_config() @@ -53,7 +69,8 @@ bb <- prolfqua::sim_lfq_data_protein_config() res1 <- summarize_stats_all(bb$data, bb$config) stopifnot((res1 |> dplyr::filter(group_ == "All") |> nrow()) == (res1 |> nrow())) - +res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +resSt <- summarize_stats_all(res2$data, res2$config) library(ggplot2) bb1 <- prolfqua::sim_lfq_data_peptide_config() config <- bb1$config @@ -68,7 +85,6 @@ config$table$get_response() stats_res <- summarize_stats(data, config) sq <- summarize_stats_quantiles(stats_res, config) sq <- summarize_stats_quantiles(stats_res, config, stats = "sd") - stats_res <- summarize_stats(data, config) xx <- summarize_stats_quantiles(stats_res, config, probs = seq(0,1,by = 0.1)) ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = group_)) + geom_line() + geom_point() diff --git a/man/summarize_stats_factors.Rd b/man/summarize_stats_factors.Rd new file mode 100644 index 000000000..7a6be91e3 --- /dev/null +++ b/man/summarize_stats_factors.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyMS_stats.R +\name{summarize_stats_factors} +\alias{summarize_stats_factors} +\title{compute var sd etc for all factor levels} +\usage{ +summarize_stats_factors(pdata, config) +} +\description{ +compute var sd etc for all factor levels +} +\examples{ +# example code +res2 <- prolfqua::sim_lfq_data_protein_2Factor_config() +xx <- summarize_stats_factors(res2$data, res2$config) +stopifnot(nrow(xx) == 80) + +} diff --git a/vignettes/Modelling2Factors.Rmd b/vignettes/Modelling2Factors.Rmd index 4d01ea97d..d0e56ef69 100644 --- a/vignettes/Modelling2Factors.Rmd +++ b/vignettes/Modelling2Factors.Rmd @@ -110,11 +110,23 @@ Still using the approach above, we can only estimate group averages in case ther ```{r prepareForGroupAverageImputation, fig.cap="Volcano and MA plot for result visualisation for the group average model"} pMerged$config$table$factor_keys_depth() + +debug(prolfqua:::get_impute_contrasts_V1) +prolfqua:::get_impute_contrasts_V1(pMerged,Contrasts) + +mh <- MissingHelpers$new(pMerged$data, pMerged$config) +mh <- mh$get_stats() +mh$interaction |> table() + +ContrastsMissing$debug("get_contrasts") + contrSimple <- prolfqua::ContrastsMissing$new(pMerged, Contrasts) contrdfSimple <- contrSimple$get_contrasts() pl <- contrSimple$get_Plotter() pl$histogram_diff() pl$volcano()$FDR + + ``` ## Merge nonimputed and imputed data.