diff --git a/R/AnalysisConfiguration.R b/R/AnalysisConfiguration.R index 7a5cbdbb3..ce0f18eba 100644 --- a/R/AnalysisConfiguration.R +++ b/R/AnalysisConfiguration.R @@ -62,13 +62,10 @@ make_reduced_hierarchy_config <- function(config, workIntensity , hierarchy ){ #' # debug(make_interaction_column) #' x <- make_interaction_column(xx, c("B","A")) #' x <- make_interaction_column(xx, c("A")) -#' bb <- prolfqua_data('data_ionstar')$filtered() -#' bb$config <- old2new(bb$config) -#' stopifnot(nrow(bb$data) == 25780) -#' config <- bb$config$clone(deep=TRUE) +#' bb <- prolfqua::sim_lfq_data_protein_config() +#' config <- bb$config #' analysis <- bb$data #' -#' config$table$factor_keys() #' config$table$factorDepth <- 1 #' make_interaction_column(analysis, #' config$table$factor_keys_depth()) diff --git a/R/ContrastsSimpleImpute.R b/R/ContrastsSimpleImpute.R index eebf83522..4c296c400 100644 --- a/R/ContrastsSimpleImpute.R +++ b/R/ContrastsSimpleImpute.R @@ -22,6 +22,8 @@ #' #' Contr <- c("dil.b_vs_a" = "group_A - group_Ctrl") #' csi <- ContrastsMissing$new(lProt, contrasts = Contr) +#' csi$get_contrast_sides() +#' #' res <- csi$get_contrasts() #' ContrastsMissing <- R6::R6Class( @@ -120,8 +122,8 @@ ContrastsMissing <- R6::R6Class( result <- dplyr::inner_join(result, pooled, by = self$lfqdata$config$table$hierarchy_keys_depth()) - result_sd_zero <- result[result$n == 0, ] - resultnot_zero <- result[result$n > 0,] + result_sd_zero <- result[result$nrMeasured == 0, ] + resultnot_zero <- result[result$nrMeasured > 0,] meandf <- resultnot_zero |> summarize(n = 1, df = 1, sd = mean(sd, na.rm = TRUE), sdT = mean(sdT, na.rm = TRUE)) meandf$sd <- ifelse(meandf$sd > 0, meandf$sd, self$minsd) @@ -144,7 +146,7 @@ ContrastsMissing <- R6::R6Class( result <- self$p.adjust(result, column = "p.value", group_by_col = "contrast", newname = "FDR") if (!all) { - result <- select(result, -all_of( c("isSingular", "not_na" , "mean" ,"n.groups", "n", "meanAll") ) ) + result <- select(result, -all_of( c("isSingular", "nrMeasured" , "mean" ,"n.groups", "n", "meanAll") ) ) } } diff --git a/R/LFQDataStats.R b/R/LFQDataStats.R index 847e941ed..0253edb7f 100644 --- a/R/LFQDataStats.R +++ b/R/LFQDataStats.R @@ -35,7 +35,7 @@ #' lfqstats$stats_wide() #' lfqstats$violin() #' runallfuncs(lfqstats) -#' x<-lfqstats +#' x <- lfqstats #' #' #study variance of normalized data #' diff --git a/R/simulate_LFQ_data.R b/R/simulate_LFQ_data.R index e785dc25a..4a4b6e44d 100644 --- a/R/simulate_LFQ_data.R +++ b/R/simulate_LFQ_data.R @@ -123,13 +123,13 @@ sim_lfq_data <- function( #' add missing values to x vector based on the values of x #' @export #' @param x vector of intensities +#' @param weight_missing greater weight more missing #' -#' -which_missing <- function(x){ +which_missing <- function(x, weight_missing = 0.2){ missing_prop <- pnorm(x, mean = mean(x), sd = sd(x)) # sample TRUE or FALSE with propability in missing_prop samplemiss <- function(missing_prop) { - mp <- c((1 - missing_prop)*0.2, missing_prop*3) + mp <- c((1 - missing_prop)*weight_missing, missing_prop*3) mp <- mp / sum(mp) sample(c(TRUE, FALSE), size = 1, replace = TRUE, prob = mp) } @@ -151,13 +151,13 @@ which_missing <- function(x){ #' x <- sim_lfq_data_peptide_config() #' stopifnot("data.frame" %in% class(x$data)) #' stopifnot("AnalysisConfiguration" %in% class(x$config)) -sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE, seed = 1234){ +sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE, weight_missing = 0.2, seed = 1234){ if (!is.null(seed)) { set.seed(seed) } data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = TRUE) if (with_missing) { - not_missing <- !which_missing(data$abundance) + not_missing <- !which_missing(data$abundance, weight_missing = weight_missing) data <- data[not_missing,] } data$isotopeLabel <- "light" @@ -185,13 +185,13 @@ sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE, seed = #' stopifnot("data.frame" %in% class(x$data)) #' stopifnot("AnalysisConfiguration" %in% class(x$config)) #' -sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE, seed = 1234){ +sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE, weight_missing = 0.2, seed = 1234){ if (!is.null(seed)) { set.seed(seed) } data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = FALSE) if (with_missing) { - data <- data[!which_missing(data$abundance),] + data <- data[!which_missing(data$abundance,weight_missing = weight_missing),] } data$isotopeLabel <- "light" data$qValue <- 0 diff --git a/R/tidyMS_R6Model.R b/R/tidyMS_R6Model.R index 608d79ba5..86643c107 100644 --- a/R/tidyMS_R6Model.R +++ b/R/tidyMS_R6Model.R @@ -87,35 +87,30 @@ LR_test <- function(modelProteinF, #' #' @export #' @examples -#' # library(tidyverse) -#' D <- prolfqua_data('data_ionstar')$normalized() -#' D$config <- old2new(D$config) -#' D$data <- dplyr::filter(D$data ,protein_Id %in% sample(protein_Id, 100)) -#' +#' D <- prolfqua::sim_lfq_data_peptide_config(Nprot = 20, weight_missing = 0.1) +#' D$data$abundance |> is.na() |> sum() +#' D <- prolfqua::sim_lfq_data_peptide_config(Nprot = 20, weight_missing = 0.1, seed =3) +#' D$data$abundance |> is.na() |> sum() #' modelName <- "f_condtion_r_peptide" #' formula_randomPeptide <- -#' strategy_lmer("transformedIntensity ~ dilution. + (1 | peptide_Id) + (1 | sampleName)", +#' strategy_lmer("abundance ~ group_ + (1 | peptide_Id) + (1 | sampleName)", #' model_name = modelName) #' #' -#' pepIntensity <- D$data -#' config <- D$config -#' -#' -#' -#' mod <- prolfqua:::build_model( -#' pepIntensity, +#' mod <- prolfqua::build_model( +#' D$data, #' formula_randomPeptide, #' modelName = modelName, -#' subject_Id = config$table$hierarchy_keys_depth()) -#' mod$get_anova() +#' subject_Id = D$config$table$hierarchy_keys_depth()) +#' aovtable <- mod$get_anova() #' -#' mod <- prolfqua:::build_model( -#' LFQData$new(pepIntensity, config), +#' mod <- prolfqua::build_model( +#' LFQData$new(D$data, D$config), #' formula_randomPeptide, #' modelName = modelName) #' model_summary(mod) #' +#' build_model <- function(data, model_strategy, subject_Id = if ("LFQData" %in% class(data)) {data$subject_Id()} else {"protein_Id"}, diff --git a/R/tidyMS_aggregation.R b/R/tidyMS_aggregation.R index 876c7c2ea..94fb3a4b6 100644 --- a/R/tidyMS_aggregation.R +++ b/R/tidyMS_aggregation.R @@ -137,22 +137,18 @@ plot_hierarchies_line <- function(res, #' config$table$is_response_transformed <- FALSE #' #debug(plot_hierarchies_line_df) #' res <- plot_hierarchies_line_df(istar_data, config) -#' res[[1]] #' #' config$table$is_response_transformed <- TRUE #' res <- plot_hierarchies_line_df(istar_data, config) -#' res[[1]] #' #' istar <- prolfqua_data('data_ionstar')$filtered() #' istar_data <- istar$data |> dplyr::filter(protein_Id %in% sample(protein_Id, 20)) #' config <- old2new(istar$config) #' res <- plot_hierarchies_line_df(istar_data, config) #' config$table$is_response_transformed -#' res[[1]] #' config$table$is_response_transformed <- TRUE #' res <- plot_hierarchies_line_df(istar_data, config) #' config$table$is_response_transformed -#' res[[1]] #' #' #TODO make it work for other hiearachy levels. #' config$table$hierarchyDepth = 2 diff --git a/R/tidyMS_missigness.R b/R/tidyMS_missigness.R index 2771c40e7..53b6321c6 100644 --- a/R/tidyMS_missigness.R +++ b/R/tidyMS_missigness.R @@ -20,23 +20,24 @@ #' config) #' xx <- complete_cases(xx, config) #' x <- interaction_missing_stats(xx, config)$data |> dplyr::arrange(desc(nrNAs)) -#' +#' nrow(x) #' tmp <- interaction_missing_stats(xx, config, #' factors= character(), #' hierarchy = config$table$hierarchy_keys()[1])$data -#' +#' stopifnot(nrow(tmp) == 10) #' tmp <- interaction_missing_stats(xx, config, #' hierarchy = config$table$hierarchy_keys()[1])$data +#' stopifnot(nrow(tmp) == length(unique(xx$protein_Id))* length(unique(xx$group_))) #' stopifnot(sum(is.na(tmp$nrMeasured))==0) #' #' tmp <- interaction_missing_stats(xx, config, factors = NULL) -#' interaction_missing_stats <- function(pdata, config, factors = config$table$factor_keys_depth(), hierarchy = config$table$hierarchy_keys(), workIntensity = config$table$get_response()) { + warning(">>>> deprecated! <<<<") pdata <- complete_cases(pdata, config) table <- config$table missingPrec <- pdata |> group_by_at(c(factors, @@ -70,28 +71,23 @@ interaction_missing_stats <- function(pdata, #' @return function #' @examples #' -#' istar <- sim_lfq_data_peptide_config() +#' istar <- sim_lfq_data_peptide_config(Nprot = 20,weight_missing = 2) #' config <- istar$config #' analysis <- istar$data -#' config$parameter$qVal_individual_threshold <- 0.01 -#' -#' xx <- prolfqua::remove_large_QValues(analysis, config) -#' xx <- complete_cases(xx, config) +#' xx <- complete_cases(analysis, config) #' nrPepTimesDilution <- length(unique(paste0(xx$protein_Id, xx$peptide_Id))) * -#' length(unique(xx$dilution.)) -#' tmp <- interaction_missing_stats(xx, config) -#' fun <- .missigness_impute_interactions(xx, config) -#' -#' long <- fun("long") -#' alldata <- fun("all") +#' length(unique(xx$group_)) +#' funx <- .missigness_impute_interactions(xx, config) +#' long <- funx("long") +#' alldata <- funx("all") #' stopifnot(length(names(alldata)) == 5) #' -#' imputed <- fun("imputed") +#' imputed <- funx("imputed") #' stopifnot(nrow(imputed) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) -#' missing <- fun("nrMeasured") +#' missing <- funx("nrMeasured") #' stopifnot(nrow(missing) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) #' -#' meanAbundance <- fun("mean") +#' meanAbundance <- funx("mean") #' stopifnot(nrow(meanAbundance) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) #' stopifnot(sum(is.na(imputed$mean.imp.group_A))==0) #' @@ -105,7 +101,6 @@ interaction_missing_stats <- function(pdata, mstats <- mstats$data mstats <- make_interaction_column(mstats, factors, sep = ":") - lowerMean <- function(meanAbundance, probs = probs){ meanAbundanceNotNA <- na.omit(meanAbundance) small10 <- meanAbundanceNotNA[meanAbundanceNotNA < quantile(meanAbundanceNotNA, probs = probs)] @@ -120,7 +115,6 @@ interaction_missing_stats <- function(pdata, }else{ mstats <- mstats |> dplyr::mutate(imputed = lowerMean(.data$meanAbundance,probs = probs)) - } res_fun <- function(value = c("long", @@ -146,19 +140,19 @@ interaction_missing_stats <- function(pdata, nrReplicates <- mstats |> dplyr::select( -one_of(c(base::setdiff(x_summaries,"nrReplicates"),"imputed") )) |> tidyr::spread(interaction, nrReplicates, sep = ".nrReplicates.") |> - arrange(!!!syms(pid)) |> + dplyr::arrange(!!!syms(pid)) |> dplyr::ungroup() nrMeasured <- mstats |> dplyr::select(-one_of(c(base::setdiff(x_summaries,"nrMeasured"),"imputed" ) )) |> tidyr::spread(interaction, nrMeasured, sep = ".nrMeasured.") |> - arrange(!!!syms(pid)) |> dplyr::ungroup() + dplyr::arrange(!!!syms(pid)) |> dplyr::ungroup() meanAbundance <- mstats |> dplyr::select(-one_of(c(base::setdiff(x_summaries,"meanAbundance"),"imputed" ) )) |> tidyr::spread(interaction, meanAbundance, sep = ".meanAbundance.") |> - arrange(!!!syms(pid)) |> dplyr::ungroup() + dplyr::arrange(!!!syms(pid)) |> dplyr::ungroup() meanAbundanceImputed <- mstats |> dplyr::select(-one_of(base::setdiff(x_summaries,"imputed" ) )) |> tidyr::spread(interaction, .data$imputed, sep = ".imputed.") |> - arrange(!!!syms(pid)) |> dplyr::ungroup() + dplyr::arrange(!!!syms(pid)) |> dplyr::ungroup() allTables <- list(meanAbundance = meanAbundance, nrMeasured = nrMeasured, @@ -214,59 +208,60 @@ interaction_missing_stats <- function(pdata, #' @family imputation #' @examples #' -#' istar <- sim_lfq_data_peptide_config() +#' istar <- sim_lfq_data_peptide_config(weight_missing = 2) #' config <- istar$config #' analysis <- istar$data #' #' xx <- complete_cases(analysis, config) -#' #' res <- missigness_impute_factors_interactions(xx, config) #' res <- missigness_impute_factors_interactions(xx, config, value = "imputed") #' res <- missigness_impute_factors_interactions(xx, config, value = "nrMeasured") +#' #debug(missigness_impute_factors_interactions) #' long <- missigness_impute_factors_interactions(xx, config, value = "long") -#' -missigness_impute_factors_interactions <- - function(pdata, - config, - probs = 0.03, - value = c("long", "nrReplicates", "nrMeasured", "meanAbundance", "imputed"), - add.prefix = FALSE, - global = TRUE) - { - value <- match.arg(value) - fac_fun <- list() - fac_fun[["interaction"]] <- .missigness_impute_interactions( - pdata, - config, - probs = probs, - global = global) - if (config$table$factorDepth > 1 ) { # if 1 only then done - for (factor in config$table$factor_keys_depth()) { - fac_fun[[factor]] <- .missigness_impute_interactions( - pdata, - config, - factors = factor, - probs = probs, - global = global) - } +#' head(long) +#' plot(long$meanAbundance, long$imputed) +missigness_impute_factors_interactions <- function( + pdata, + config, + probs = 0.03, + value = c("long", "nrReplicates", "nrMeasured", "meanAbundance", "imputed"), + add.prefix = FALSE, + global = TRUE) +{ + value <- match.arg(value) + fac_fun <- list() + fac_fun[["interaction"]] <- .missigness_impute_interactions( + pdata, + config, + probs = probs, + global = global) + if (config$table$factorDepth > 1 ) { # if 1 only then done + for (factor in config$table$factor_keys_depth()) { + fac_fun[[factor]] <- .missigness_impute_interactions( + pdata, + config, + factors = factor, + probs = probs, + global = global) } + } - fac_res <- vector(mode = "list", length = length(fac_fun)) - names(fac_res) <- names(fac_fun) - for (fun_name in names(fac_fun)) { - fac_res[[fun_name]] <- fac_fun[[fun_name]](value, add.prefix = add.prefix) - } - if (value == "long") { - intfact <- dplyr::bind_rows(fac_res) - } else { - intfact <- purrr::reduce(fac_res, - dplyr::inner_join, - by = c(config$table$hierarchy_keys(), - config$table$isotopeLabel, "value")) + fac_res <- vector(mode = "list", length = length(fac_fun)) + names(fac_res) <- names(fac_fun) + for (fun_name in names(fac_fun)) { + fac_res[[fun_name]] <- fac_fun[[fun_name]](value, add.prefix = add.prefix) + } + if (value == "long") { + intfact <- dplyr::bind_rows(fac_res) + } else { + intfact <- purrr::reduce(fac_res, + dplyr::inner_join, + by = c(config$table$hierarchy_keys(), + config$table$isotopeLabel, "value")) - } - return(dplyr::ungroup(intfact)) } + return(dplyr::ungroup(intfact)) +} @@ -364,8 +359,6 @@ get_contrast <- function(data, hierarchy_keys, contrasts) { - - for (i in seq_along(contrasts)) { message(names(contrasts)[i], "=", contrasts[i],"\n") data <- dplyr::mutate(data, !!names(contrasts)[i] := !!rlang::parse_expr(contrasts[i])) @@ -386,7 +379,6 @@ get_contrast <- function(data, res[[names(contrasts)[i]]] <- df } res <- dplyr::bind_rows(res) - return(dplyr::ungroup(res)) } @@ -432,6 +424,7 @@ get_imputed_contrasts <- function(pepIntensity, stop("At least 1 observation in interaction to infer LOD.") } long <- missigness_impute_factors_interactions(pepIntensity, config, value = "long" ) + # determine limit of detection LOD <- long |> filter(nrNAs == nrReplicates - present) |> pull(meanAbundance) |> median(na.rm=TRUE) long <- tidyr::complete(long, tidyr::nesting(!!!syms(config$table$hierarchy_keys())), interaction) @@ -492,7 +485,7 @@ missigness_histogram <- function(x, if (config$table$is_response_transformed) { missingPrec <- missingPrec |> dplyr::mutate(meanAbundance = ifelse(is.na(.data$meanAbundance), min(.data$meanAbundance, na.rm = TRUE) - 1, - .data$meanAbundance)) + .data$meanAbundance)) }else{ missingPrec <- missingPrec |> dplyr::mutate(meanAbundance = ifelse(is.na(.data$meanAbundance),min(.data$meanAbundance, na.rm = TRUE) - 20,.data$meanAbundance)) diff --git a/R/tidyMS_stats.R b/R/tidyMS_stats.R index 7ce609dc3..a94c60cbd 100644 --- a/R/tidyMS_stats.R +++ b/R/tidyMS_stats.R @@ -4,8 +4,8 @@ #' @return data.frame #' @examples #' -#' x <- data.frame(not_na =c(1,2,2), var = c(3,4,4), mean = c(3,3,3)) -#' x <- data.frame(not_na =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), mean = c(-1.94,-1.46,-1.87,-1.45) ) +#' x <- data.frame(nrMeasured =c(1,2,2), var = c(3,4,4), meanAbundance = c(3,3,3)) +#' x <- data.frame(nrMeasured =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), meanAbundance = c(-1.94,-1.46,-1.87,-1.45) ) #' prolfqua:::pooled_V2(na.omit(x)) #' prolfqua:::pooled_V1(na.omit(x)) #' x <- x[1,, drop=FALSE] @@ -14,9 +14,9 @@ #' prolfqua:::pooled_V2(na.omit(x)) pooled_V2 <- function(x){ - n <- x$not_na + n <- x$nrMeasured sample.var <- x$var - sample.mean <- x$mean + sample.mean <- x$meanAbundance pool.n <- sum(n) pool.mean <- sum(n * sample.mean)/pool.n @@ -44,9 +44,9 @@ pooled_V2 <- function(x){ #' @rdname pooled_var #' @param x data.frame pooled_V1 <- function(x){ - n <- x$not_na + n <- x$nrMeasured sample.var <- x$var - sample.mean <- x$mean + sample.mean <- x$meanAbundance pool.n <- sum(n) n.groups <- length(sample.var) @@ -83,34 +83,34 @@ pooled_V1 <- function(x){ #' @family stats #' #' @examples -#' x <- data.frame(not_na =c(1,2,2), var = c(3,4,4), mean = c(3,3,3)) -#' x <- data.frame(not_na =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), mean = c(-1.94,-1.46,-1.87,-1.45) ) +#' x <- data.frame(nrMeasured =c(1,2,2), var = c(3,4,4), meanAbundance = c(3,3,3)) +#' x <- data.frame(nrMeasured =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), meanAbundance = c(-1.94,-1.46,-1.87,-1.45) ) #' compute_pooled(x) #' compute_pooled(x, method = "V2") #' #debug(compute_pooled) #' y <- data.frame(dilution.=c("a","b","c"), -#' n = c(4,4,4), not_na = c(0,0,1), sd =c(NA,NA,NA), -#' var = c(NA,NA,NA),mean = c(NaN,NaN,NaN)) +#' nrReplicates = c(4,4,4), nrMeasured = c(0,0,1), sd =c(NA,NA,NA), +#' var = c(NA,NA,NA),meanAbundance = c(NaN,NaN,NaN)) #' compute_pooled(y) -#' yb <- y |> dplyr::filter(not_na > 1) +#' yb <- y |> dplyr::filter(nrMeasured > 1) compute_pooled <- function(x, method = c("V1","V2")){ method <- match.arg(method) - xm <- x |> dplyr::filter(.data$not_na > 0) - meanAll <- sum(xm$mean * xm$not_na)/sum(xm$not_na) - not_na = sum(xm$not_na) + xm <- x |> dplyr::filter(.data$nrMeasured > 0) + meanAll <- sum(xm$meanAbundance * xm$nrMeasured)/sum(xm$nrMeasured) + nrMeasured = sum(xm$nrMeasured) func <- pooled_V1 if (method == "V2") { func <- pooled_V2 } - x <- x |> dplyr::filter(.data$not_na > 1) + x <- x |> dplyr::filter(.data$nrMeasured > 1) res <- func(x) if (is.na(res$mean)) { res$mean <- meanAll } res$meanAll <- meanAll - res$not_na <- not_na + res$nrMeasured <- nrMeasured return(res) } @@ -121,14 +121,13 @@ compute_pooled <- function(x, method = c("V1","V2")){ #' @family stats #' @examples #' -#' -#' bb <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb$config) +#' bb <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb$config #' data <- bb$data #' #' res1 <- summarize_stats(data, config) #' pv <- poolvar(res1, config) -#' stopifnot(nrow(pv) == nrow(res1)/5) +#' stopifnot(nrow(pv) == nrow(res1)/3) #' poolvar <- function(res1, config, method = c("V1","V2")){ method <- match.arg(method) @@ -157,31 +156,30 @@ poolvar <- function(res1, config, method = c("V1","V2")){ #' @examples #' #' -#' bb <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb$config) +#' bb <- prolfqua::sim_lfq_data_protein_config() +#' config <- bb$config #' data <- bb$data #' #' res1 <- summarize_stats(data, config) -#' d <- res1 |> dplyr::filter(protein_Id == "CON__P01030~9~NA" & peptide_Id == "AELADQAASWLTR") -#' d <- res1 |> dplyr::filter(protein_Id == "CON__Q3SZR3~50~NA" & peptide_Id == "EHFVDLLLSK") -#' #CON__P02769~18~NA VHKECCHGDLLECADDR -#' d <- res1 |> dplyr::filter(protein_Id == "CON__P02769~18~NA" & peptide_Id == "VHKECCHGDLLECADDR") #' summarize_stats <- function(pdata, config){ pdata <- complete_cases(pdata, config) intsym <- sym(config$table$get_response()) hierarchyFactor <- pdata |> dplyr::group_by(!!!syms( c(config$table$hierarchy_keys(), config$table$factor_keys_depth()) )) |> - dplyr::summarize(n = dplyr::n(), - not_na = sum(!is.na(!!intsym)), + dplyr::summarize(nrReplicates = dplyr::n(), + nrMeasured = sum(!is.na(!!intsym)), + nrNAs = sum(is.na(!!intsym)), sd = stats::sd(!!intsym, na.rm = TRUE), var = stats::var(!!intsym, na.rm = TRUE), - mean = mean(!!intsym, na.rm = TRUE),.groups = "drop_last") |> dplyr::ungroup() + meanAbundance = mean(!!intsym, na.rm = TRUE), + medianAbundance = median(!!intsym, na.rm = TRUE), + .groups = "drop_last") |> dplyr::ungroup() hierarchyFactor <- hierarchyFactor |> dplyr::mutate(dplyr::across(config$table$factor_keys_depth(), as.character)) if (config$table$is_response_transformed == FALSE) { - hierarchyFactor |> dplyr::mutate(CV = sd/mean * 100) -> hierarchyFactor + hierarchyFactor |> dplyr::mutate(CV = sd/meanAbundance * 100) -> hierarchyFactor } return(ungroup(hierarchyFactor)) } @@ -198,32 +196,29 @@ summarize_stats <- function(pdata, config){ #' @examples #' #' -#' bb <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb$config) -#' data <- bb$data +#' bb <- prolfqua::sim_lfq_data_protein_config() #' -#' res1 <- summarize_stats_all(data, config) -#' d <- res1 |> dplyr::filter(protein_Id == "CON__P01030~9~NA" & peptide_Id == "AELADQAASWLTR") -#' d <- res1 |> dplyr::filter(protein_Id == "CON__Q3SZR3~50~NA" & peptide_Id == "EHFVDLLLSK") -#' #CON__P02769~18~NA VHKECCHGDLLECADDR -#' d <- res1 |> dplyr::filter(protein_Id == "CON__P02769~18~NA" & peptide_Id == "VHKECCHGDLLECADDR") -#' res1 |> dplyr::filter(dilution. == "pooled") +#' res1 <- summarize_stats_all(bb$data, bb$config) +#' +#' stopifnot((res1 |> dplyr::filter(group_ == "All") |> nrow()) == (res1 |> nrow())) #' summarize_stats_all <- function(pdata, config){ pdata <- complete_cases(pdata, config) intsym <- sym(config$table$get_response()) hierarchy <- pdata |> dplyr::group_by(!!!syms( config$table$hierarchy_keys() )) |> - dplyr::summarize(n = dplyr::n(), - not_na = sum(!is.na(!!intsym)), + dplyr::summarize(nrReplicates = dplyr::n(), + nrMeasured = sum(!is.na(!!intsym)), sd = sd(!!intsym,na.rm = TRUE), var = sd(!!intsym,na.rm = TRUE), - mean = mean(!!intsym,na.rm = TRUE)) + meanAbundance = mean(!!intsym,na.rm = TRUE), + medianAbundance = median(!!intsym, na.rm = TRUE), + .groups = "drop_last") |> dplyr::ungroup() hierarchy <- dplyr::mutate(hierarchy, !!config$table$factor_keys()[1] := "All") hierarchyFactor <- hierarchy if (config$table$is_response_transformed == FALSE) { - hierarchyFactor |> dplyr::mutate(CV = sd/mean * 100) -> hierarchyFactor + hierarchyFactor |> dplyr::mutate(CV = sd/meanAbundance * 100) -> hierarchyFactor } return(ungroup(hierarchyFactor)) } @@ -239,24 +234,23 @@ summarize_stats_all <- function(pdata, config){ #' @family stats #' @examples #' library(ggplot2) -#' bb1 <- prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data <- bb1$data #' stats_res <- summarize_stats(data, config) -#' summarize_stats_quantiles(stats_res, config) -#' summarize_stats_quantiles(stats_res, config, stats = "CV") -#'stats_res -#' bb <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb$config$clone(deep = TRUE)) +#' sq <- summarize_stats_quantiles(stats_res, config) +#' sq <- summarize_stats_quantiles(stats_res, config, stats = "CV") +#' bb <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb$config #' data <- bb$data #' config$table$get_response() #' stats_res <- summarize_stats(data, config) -#' summarize_stats_quantiles(stats_res, config) -#' summarize_stats_quantiles(stats_res, config, stats = "sd") +#' sq <- summarize_stats_quantiles(stats_res, config) +#' sq <- summarize_stats_quantiles(stats_res, config, stats = "sd") #' #' stats_res <- summarize_stats(data, config) #' xx <- summarize_stats_quantiles(stats_res, config, probs = seq(0,1,by = 0.1)) -#' ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = dilution.)) + geom_line() + geom_point() +#' ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = group_)) + geom_line() + geom_point() #' #' summarize_stats_quantiles <- function(stats_res, @@ -324,10 +318,10 @@ summarize_stats_quantiles <- function(stats_res, #' @examples #' #' -#' #library(ggplot2) #' -#' bb1 <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data2 <- bb1$data #' stats_res <- summarize_stats(data2, config) #' xx <- summarize_stats_quantiles(stats_res, config, probs = c(0.5,0.8)) @@ -336,7 +330,7 @@ summarize_stats_quantiles <- function(stats_res, #' summary <- bbb |> #' dplyr::select( -N_exact, -quantiles, -sdtrimmed ) |> #' tidyr::spread(delta, N, sep = "=") -#' summary +#' lfq_power_t_test_quantiles_V2 <- function(quantile_sd, delta = c(0.59,1,2), @@ -371,14 +365,13 @@ lfq_power_t_test_quantiles_V2 <- #' @family stats #' @examples #' -#' bb1 <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data2 <- bb1$data #' #' res <- lfq_power_t_test_quantiles(data2, config) #' res$summary #' stats_res <- summarize_stats(data2, config) -#' unique(stats_res$dilution.) #' res <- lfq_power_t_test_quantiles(data2, config, delta = 2) #' res <- lfq_power_t_test_quantiles(data2, config, delta = c(0.5,1,2)) #' @@ -434,13 +427,10 @@ lfq_power_t_test_quantiles <- function(pdata, #' @family stats #' @examples #' -#' bb1 <- prolfqua::prolfqua_data('data_IonstarProtein_subsetNorm') -#' +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() #' -#' ldata <- LFQData$new(bb1$data, old2new(bb1$config)) -#' ldata <- ldata$get_sample(20) +#' ldata <- LFQData$new(bb1$data, bb1$config) #' stats_res <- summarize_stats(ldata$data, ldata$config) -#' #' bb <- lfq_power_t_test_proteins(stats_res) #' lfq_power_t_test_proteins <- function(stats_res, @@ -478,16 +468,16 @@ lfq_power_t_test_proteins <- function(stats_res, #' @examples #' #' -#' bb1 <-prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data <- bb1$data #' res <- summarize_stats(data, config) -#' plot_stat_density(res, config, stat = "mean") +#' plot_stat_density(res, config, stat = "meanAbundance") #' plot_stat_density(res, config, stat = "sd") #' plot_stat_density(res, config, stat = "CV") plot_stat_density <- function(pdata, config, - stat = c("CV","mean","sd"), + stat = c("CV","meanAbundance","sd"), ggstat = c("density", "ecdf")){ stat <- match.arg(stat) ggstat <- match.arg(ggstat) @@ -508,18 +498,22 @@ plot_stat_density <- function(pdata, #' #' #' -#' bb1 <- prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data2 <- bb1$data #' res <- summarize_stats(data2, config) -#' plot_stat_density_median(res, config,"CV") -#' plot_stat_density_median(res, config,"mean") -#' plot_stat_density_median(res, config,"sd") -plot_stat_density_median <- function(pdata, config, stat = c("CV","mean","sd"), ggstat = c("density", "ecdf")){ +#' plot_stat_density_median(res, config, "CV") +#' plot_stat_density_median(res, config, "meanAbundance") +#' plot_stat_density_median(res, config, "sd") +plot_stat_density_median <- function( + pdata, + config, + stat = c("CV","meanAbundance","sd"), + ggstat = c("density", "ecdf")){ stat <- match.arg(stat) ggstat <- match.arg(ggstat) pdata <- pdata |> dplyr::filter(!is.na(!!sym(stat))) - res <- pdata |> dplyr::mutate(top = ifelse(mean > median(mean, na.rm = TRUE),"top 50","bottom 50")) -> top50 + res <- pdata |> dplyr::mutate(top = ifelse(meanAbundance > median(meanAbundance, na.rm = TRUE),"top 50","bottom 50")) -> top50 p <- ggplot(top50, aes_string(x = stat, colour = "top")) + geom_line(stat = ggstat) + facet_wrap(config$table$factor_keys()[1]) return(p) @@ -536,16 +530,16 @@ plot_stat_density_median <- function(pdata, config, stat = c("CV","mean","sd"), #' @examples #' #' -#' bb1 <- prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data <- bb1$data #' res <- summarize_stats(data, config) #' res <- summarize_stats(data, config) -#' plot_stat_violin(res, config, stat = "mean") +#' plot_stat_violin(res, config, stat = "meanAbundance") #' plot_stat_violin(res, config, stat = "sd") #' plot_stat_violin(res, config, stat = "CV") #' -plot_stat_violin <- function(pdata, config, stat = c("CV", "mean", "sd")){ +plot_stat_violin <- function(pdata, config, stat = c("CV", "meanAbundance", "sd")){ stat <- match.arg(stat) pdata <- pdata |> tidyr::unite("groups", config$table$factor_keys_depth()) p <- ggplot(pdata, aes_string(x = "groups", y = stat )) + @@ -565,12 +559,12 @@ plot_stat_violin <- function(pdata, config, stat = c("CV", "mean", "sd")){ #' @examples #' #' -#' bb1 <- prolfqua_data('data_ionstar')$normalized() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data <- bb1$data #' res <- summarize_stats(data, config) -#' plot_stat_violin_median(res, config, stat = "mean") -plot_stat_violin_median <- function(pdata, config , stat = c("CV", "mean", "sd")){ +#' plot_stat_violin_median(res, config, stat = "meanAbundance") +plot_stat_violin_median <- function(pdata, config , stat = c("CV", "meanAbundance", "sd")){ median.quartile <- function(x){ out <- quantile(x, probs = c(0.25,0.5,0.75)) names(out) <- c("ymin","y","ymax") @@ -579,7 +573,7 @@ plot_stat_violin_median <- function(pdata, config , stat = c("CV", "mean", "sd") pdata <- pdata |> dplyr::filter(!is.na(!!sym(stat))) res <- pdata |> - dplyr::mutate(top = ifelse(mean > median(mean, na.rm = TRUE),"top 50","bottom 50")) -> + dplyr::mutate(top = ifelse(meanAbundance > median(meanAbundance, na.rm = TRUE),"top 50","bottom 50")) -> top50 p <- ggplot(top50, aes_string(x = config$table$factor_keys()[1], y = stat)) + @@ -602,8 +596,8 @@ plot_stat_violin_median <- function(pdata, config , stat = c("CV", "mean", "sd") #' #' #' -#' bb1 <- prolfqua_data('data_ionstar')$filtered() -#' config <- old2new(bb1$config$clone( deep = TRUE)) +#' bb1 <- prolfqua::sim_lfq_data_peptide_config() +#' config <- bb1$config #' data <- bb1$data #' res <- summarize_stats(data, config) #' @@ -624,11 +618,11 @@ plot_stdv_vs_mean <- function(pdata, config, size=2000){ size <- min(size, min(summary$n)) pdata <- pdata |> - group_by_at(config$table$factor_keys_depth()) |> - sample_n(size = size) |> - ungroup() + dplyr::group_by_at(config$table$factor_keys_depth()) |> + dplyr::sample_n(size = size) |> + dplyr::ungroup() - p <- ggplot(pdata, aes(x = mean, y = sd)) + + p <- ggplot(pdata, aes(x = meanAbundance, y = sd)) + geom_point() + geom_smooth(method = "loess") + facet_wrap(config$table$factor_keys_depth(), nrow = 1) + diff --git a/man/ContrastsMissing.Rd b/man/ContrastsMissing.Rd index 53a306823..4f68b79a3 100644 --- a/man/ContrastsMissing.Rd +++ b/man/ContrastsMissing.Rd @@ -28,6 +28,8 @@ lProt$rename_response("transformedIntensity") Contr <- c("dil.b_vs_a" = "group_A - group_Ctrl") csi <- ContrastsMissing$new(lProt, contrasts = Contr) +csi$get_contrast_sides() + res <- csi$get_contrasts() } diff --git a/man/LFQDataStats.Rd b/man/LFQDataStats.Rd index d57c069f3..d9a58f441 100644 --- a/man/LFQDataStats.Rd +++ b/man/LFQDataStats.Rd @@ -40,7 +40,7 @@ lfqstats <- lfqdata$get_Stats() lfqstats$stats_wide() lfqstats$violin() runallfuncs(lfqstats) -x<-lfqstats +x <- lfqstats #study variance of normalized data diff --git a/man/build_model.Rd b/man/build_model.Rd index 7b02feae9..dccac8459 100644 --- a/man/build_model.Rd +++ b/man/build_model.Rd @@ -32,35 +32,30 @@ a object of class \code{\link{Model}} Build protein models from data } \examples{ -# library(tidyverse) -D <- prolfqua_data('data_ionstar')$normalized() -D$config <- old2new(D$config) -D$data <- dplyr::filter(D$data ,protein_Id \%in\% sample(protein_Id, 100)) - +D <- prolfqua::sim_lfq_data_peptide_config(Nprot = 20, weight_missing = 0.1) +D$data$abundance |> is.na() |> sum() +D <- prolfqua::sim_lfq_data_peptide_config(Nprot = 20, weight_missing = 0.1, seed =3) +D$data$abundance |> is.na() |> sum() modelName <- "f_condtion_r_peptide" formula_randomPeptide <- - strategy_lmer("transformedIntensity ~ dilution. + (1 | peptide_Id) + (1 | sampleName)", + strategy_lmer("abundance ~ group_ + (1 | peptide_Id) + (1 | sampleName)", model_name = modelName) -pepIntensity <- D$data -config <- D$config - - - -mod <- prolfqua:::build_model( - pepIntensity, +mod <- prolfqua::build_model( + D$data, formula_randomPeptide, modelName = modelName, - subject_Id = config$table$hierarchy_keys_depth()) -mod$get_anova() + subject_Id = D$config$table$hierarchy_keys_depth()) +aovtable <- mod$get_anova() -mod <- prolfqua:::build_model( - LFQData$new(pepIntensity, config), +mod <- prolfqua::build_model( + LFQData$new(D$data, D$config), formula_randomPeptide, modelName = modelName) model_summary(mod) + } \seealso{ \code{\link{model_analyse}}, \code{\link{strategy_lmer}} \code{\link{strategy_lm}} diff --git a/man/dot-missigness_impute_interactions.Rd b/man/dot-missigness_impute_interactions.Rd index 34f874c32..00c0543db 100644 --- a/man/dot-missigness_impute_interactions.Rd +++ b/man/dot-missigness_impute_interactions.Rd @@ -35,28 +35,23 @@ used in Acetylation project p2916 } \examples{ -istar <- sim_lfq_data_peptide_config() +istar <- sim_lfq_data_peptide_config(Nprot = 20,weight_missing = 2) config <- istar$config analysis <- istar$data -config$parameter$qVal_individual_threshold <- 0.01 - -xx <- prolfqua::remove_large_QValues(analysis, config) -xx <- complete_cases(xx, config) +xx <- complete_cases(analysis, config) nrPepTimesDilution <- length(unique(paste0(xx$protein_Id, xx$peptide_Id))) * - length(unique(xx$dilution.)) -tmp <- interaction_missing_stats(xx, config) -fun <- .missigness_impute_interactions(xx, config) - -long <- fun("long") -alldata <- fun("all") + length(unique(xx$group_)) +funx <- .missigness_impute_interactions(xx, config) +long <- funx("long") +alldata <- funx("all") stopifnot(length(names(alldata)) == 5) -imputed <- fun("imputed") +imputed <- funx("imputed") stopifnot(nrow(imputed) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) -missing <- fun("nrMeasured") +missing <- funx("nrMeasured") stopifnot(nrow(missing) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) - meanAbundance <- fun("mean") + meanAbundance <- funx("mean") stopifnot(nrow(meanAbundance) == length(unique(paste0(xx$protein_Id, xx$peptide_Id)))) stopifnot(sum(is.na(imputed$mean.imp.group_A))==0) diff --git a/man/interaction_missing_stats.Rd b/man/interaction_missing_stats.Rd index 7afa1e2d5..d510d8726 100644 --- a/man/interaction_missing_stats.Rd +++ b/man/interaction_missing_stats.Rd @@ -38,16 +38,16 @@ xx <- prolfqua::remove_large_QValues(analysis, config) xx <- complete_cases(xx, config) x <- interaction_missing_stats(xx, config)$data |> dplyr::arrange(desc(nrNAs)) - +nrow(x) tmp <- interaction_missing_stats(xx, config, factors= character(), hierarchy = config$table$hierarchy_keys()[1])$data - +stopifnot(nrow(tmp) == 10) tmp <- interaction_missing_stats(xx, config, hierarchy = config$table$hierarchy_keys()[1])$data +stopifnot(nrow(tmp) == length(unique(xx$protein_Id))* length(unique(xx$group_))) stopifnot(sum(is.na(tmp$nrMeasured))==0) tmp <- interaction_missing_stats(xx, config, factors = NULL) - } \keyword{internal} diff --git a/man/lfq_power_t_test_proteins.Rd b/man/lfq_power_t_test_proteins.Rd index 03149b944..8624c398f 100644 --- a/man/lfq_power_t_test_proteins.Rd +++ b/man/lfq_power_t_test_proteins.Rd @@ -28,13 +28,10 @@ Compute theoretical sample sizes from factor level standard deviations } \examples{ -bb1 <- prolfqua::prolfqua_data('data_IonstarProtein_subsetNorm') +bb1 <- prolfqua::sim_lfq_data_peptide_config() - -ldata <- LFQData$new(bb1$data, old2new(bb1$config)) -ldata <- ldata$get_sample(20) +ldata <- LFQData$new(bb1$data, bb1$config) stats_res <- summarize_stats(ldata$data, ldata$config) - bb <- lfq_power_t_test_proteins(stats_res) } diff --git a/man/lfq_power_t_test_quantiles.Rd b/man/lfq_power_t_test_quantiles.Rd index da427401a..82f2037cc 100644 --- a/man/lfq_power_t_test_quantiles.Rd +++ b/man/lfq_power_t_test_quantiles.Rd @@ -29,14 +29,13 @@ Compute theoretical sample sizes from factor level standard deviations } \examples{ -bb1 <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data2 <- bb1$data res <- lfq_power_t_test_quantiles(data2, config) res$summary stats_res <- summarize_stats(data2, config) -unique(stats_res$dilution.) res <- lfq_power_t_test_quantiles(data2, config, delta = 2) res <- lfq_power_t_test_quantiles(data2, config, delta = c(0.5,1,2)) diff --git a/man/lfq_power_t_test_quantiles_V2.Rd b/man/lfq_power_t_test_quantiles_V2.Rd index 99bd5d257..cedd9d164 100644 --- a/man/lfq_power_t_test_quantiles_V2.Rd +++ b/man/lfq_power_t_test_quantiles_V2.Rd @@ -29,10 +29,10 @@ estimate sample sizes \examples{ -#library(ggplot2) -bb1 <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb1$config$clone( deep = TRUE)) + +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data2 <- bb1$data stats_res <- summarize_stats(data2, config) xx <- summarize_stats_quantiles(stats_res, config, probs = c(0.5,0.8)) @@ -41,7 +41,7 @@ bbb <- dplyr::bind_rows(bbb) summary <- bbb |> dplyr::select( -N_exact, -quantiles, -sdtrimmed ) |> tidyr::spread(delta, N, sep = "=") -summary + } \seealso{ Other stats: diff --git a/man/make_interaction_column.Rd b/man/make_interaction_column.Rd index 18810fcbc..e7c32b4ed 100644 --- a/man/make_interaction_column.Rd +++ b/man/make_interaction_column.Rd @@ -14,13 +14,10 @@ xx <- data.frame(A = c("a","a","a"), B = c("d","d","e")) # debug(make_interaction_column) x <- make_interaction_column(xx, c("B","A")) x <- make_interaction_column(xx, c("A")) -bb <- prolfqua_data('data_ionstar')$filtered() -bb$config <- old2new(bb$config) -stopifnot(nrow(bb$data) == 25780) -config <- bb$config$clone(deep=TRUE) +bb <- prolfqua::sim_lfq_data_protein_config() +config <- bb$config analysis <- bb$data -config$table$factor_keys() config$table$factorDepth <- 1 make_interaction_column(analysis, config$table$factor_keys_depth()) diff --git a/man/missigness_impute_factors_interactions.Rd b/man/missigness_impute_factors_interactions.Rd index f09db6fc2..1e3c88913 100644 --- a/man/missigness_impute_factors_interactions.Rd +++ b/man/missigness_impute_factors_interactions.Rd @@ -31,17 +31,18 @@ should generalize at some stage } \examples{ -istar <- sim_lfq_data_peptide_config() +istar <- sim_lfq_data_peptide_config(weight_missing = 2) config <- istar$config analysis <- istar$data xx <- complete_cases(analysis, config) - res <- missigness_impute_factors_interactions(xx, config) res <- missigness_impute_factors_interactions(xx, config, value = "imputed") res <- missigness_impute_factors_interactions(xx, config, value = "nrMeasured") +#debug(missigness_impute_factors_interactions) long <- missigness_impute_factors_interactions(xx, config, value = "long") - +head(long) +plot(long$meanAbundance, long$imputed) } \seealso{ Other imputation: diff --git a/man/plot_hierarchies_line_df.Rd b/man/plot_hierarchies_line_df.Rd index a3ecaad2d..b32236300 100644 --- a/man/plot_hierarchies_line_df.Rd +++ b/man/plot_hierarchies_line_df.Rd @@ -25,22 +25,18 @@ config <- old2new(istar$config) config$table$is_response_transformed <- FALSE #debug(plot_hierarchies_line_df) res <- plot_hierarchies_line_df(istar_data, config) -res[[1]] config$table$is_response_transformed <- TRUE res <- plot_hierarchies_line_df(istar_data, config) -res[[1]] istar <- prolfqua_data('data_ionstar')$filtered() istar_data <- istar$data |> dplyr::filter(protein_Id \%in\% sample(protein_Id, 20)) config <- old2new(istar$config) res <- plot_hierarchies_line_df(istar_data, config) config$table$is_response_transformed -res[[1]] config$table$is_response_transformed <- TRUE res <- plot_hierarchies_line_df(istar_data, config) config$table$is_response_transformed -res[[1]] #TODO make it work for other hiearachy levels. config$table$hierarchyDepth = 2 diff --git a/man/plot_stat_density.Rd b/man/plot_stat_density.Rd index be6541d20..33a415c41 100644 --- a/man/plot_stat_density.Rd +++ b/man/plot_stat_density.Rd @@ -7,7 +7,7 @@ plot_stat_density( pdata, config, - stat = c("CV", "mean", "sd"), + stat = c("CV", "meanAbundance", "sd"), ggstat = c("density", "ecdf") ) } @@ -26,11 +26,11 @@ plot density distribution or ecdf of sd, mean or CV \examples{ -bb1 <-prolfqua_data('data_ionstar')$filtered() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data <- bb1$data res <- summarize_stats(data, config) -plot_stat_density(res, config, stat = "mean") +plot_stat_density(res, config, stat = "meanAbundance") plot_stat_density(res, config, stat = "sd") plot_stat_density(res, config, stat = "CV") } diff --git a/man/plot_stat_density_median.Rd b/man/plot_stat_density_median.Rd index 466acf766..b71ce6bb0 100644 --- a/man/plot_stat_density_median.Rd +++ b/man/plot_stat_density_median.Rd @@ -7,7 +7,7 @@ plot_stat_density_median( pdata, config, - stat = c("CV", "mean", "sd"), + stat = c("CV", "meanAbundance", "sd"), ggstat = c("density", "ecdf") ) } @@ -27,13 +27,13 @@ plot density distribution or ecdf of sd, mean or cv given intensity below and ab -bb1 <- prolfqua_data('data_ionstar')$filtered() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data2 <- bb1$data res <- summarize_stats(data2, config) -plot_stat_density_median(res, config,"CV") -plot_stat_density_median(res, config,"mean") -plot_stat_density_median(res, config,"sd") +plot_stat_density_median(res, config, "CV") +plot_stat_density_median(res, config, "meanAbundance") +plot_stat_density_median(res, config, "sd") } \seealso{ Other stats: diff --git a/man/plot_stat_violin.Rd b/man/plot_stat_violin.Rd index c18e3b686..c02ebb279 100644 --- a/man/plot_stat_violin.Rd +++ b/man/plot_stat_violin.Rd @@ -4,7 +4,7 @@ \alias{plot_stat_violin} \title{plot Violin plot of sd CV or mean} \usage{ -plot_stat_violin(pdata, config, stat = c("CV", "mean", "sd")) +plot_stat_violin(pdata, config, stat = c("CV", "meanAbundance", "sd")) } \arguments{ \item{pdata}{data.frame} @@ -19,12 +19,12 @@ plot Violin plot of sd CV or mean \examples{ -bb1 <- prolfqua_data('data_ionstar')$filtered() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data <- bb1$data res <- summarize_stats(data, config) res <- summarize_stats(data, config) -plot_stat_violin(res, config, stat = "mean") +plot_stat_violin(res, config, stat = "meanAbundance") plot_stat_violin(res, config, stat = "sd") plot_stat_violin(res, config, stat = "CV") diff --git a/man/plot_stat_violin_median.Rd b/man/plot_stat_violin_median.Rd index 0f03fefd4..095a8bab5 100644 --- a/man/plot_stat_violin_median.Rd +++ b/man/plot_stat_violin_median.Rd @@ -4,7 +4,7 @@ \alias{plot_stat_violin_median} \title{plot Violin plot of sd CV or mean given intensity lower or above median} \usage{ -plot_stat_violin_median(pdata, config, stat = c("CV", "mean", "sd")) +plot_stat_violin_median(pdata, config, stat = c("CV", "meanAbundance", "sd")) } \arguments{ \item{pdata}{data.frame} @@ -19,11 +19,11 @@ plot Violin plot of sd CV or mean given intensity lower or above median \examples{ -bb1 <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data <- bb1$data res <- summarize_stats(data, config) -plot_stat_violin_median(res, config, stat = "mean") +plot_stat_violin_median(res, config, stat = "meanAbundance") } \seealso{ Other stats: diff --git a/man/plot_stdv_vs_mean.Rd b/man/plot_stdv_vs_mean.Rd index 3161ad4f1..e14edffd5 100644 --- a/man/plot_stdv_vs_mean.Rd +++ b/man/plot_stdv_vs_mean.Rd @@ -20,8 +20,8 @@ plot stddev vs mean to asses stability of variance -bb1 <- prolfqua_data('data_ionstar')$filtered() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data <- bb1$data res <- summarize_stats(data, config) diff --git a/man/pooled_var.Rd b/man/pooled_var.Rd index e30f4a6a8..ec18f8df9 100644 --- a/man/pooled_var.Rd +++ b/man/pooled_var.Rd @@ -27,33 +27,32 @@ https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.1/7.3.1.1 } \examples{ -x <- data.frame(not_na =c(1,2,2), var = c(3,4,4), mean = c(3,3,3)) -x <- data.frame(not_na =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), mean = c(-1.94,-1.46,-1.87,-1.45) ) +x <- data.frame(nrMeasured =c(1,2,2), var = c(3,4,4), meanAbundance = c(3,3,3)) +x <- data.frame(nrMeasured =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), meanAbundance = c(-1.94,-1.46,-1.87,-1.45) ) prolfqua:::pooled_V2(na.omit(x)) prolfqua:::pooled_V1(na.omit(x)) x <- x[1,, drop=FALSE] x na.omit(x) prolfqua:::pooled_V2(na.omit(x)) -x <- data.frame(not_na =c(1,2,2), var = c(3,4,4), mean = c(3,3,3)) -x <- data.frame(not_na =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), mean = c(-1.94,-1.46,-1.87,-1.45) ) +x <- data.frame(nrMeasured =c(1,2,2), var = c(3,4,4), meanAbundance = c(3,3,3)) +x <- data.frame(nrMeasured =c(1,2,1,1), var = c(NA, 0.0370, NA, NA), meanAbundance = c(-1.94,-1.46,-1.87,-1.45) ) compute_pooled(x) compute_pooled(x, method = "V2") #debug(compute_pooled) y <- data.frame(dilution.=c("a","b","c"), - n = c(4,4,4), not_na = c(0,0,1), sd =c(NA,NA,NA), - var = c(NA,NA,NA),mean = c(NaN,NaN,NaN)) + nrReplicates = c(4,4,4), nrMeasured = c(0,0,1), sd =c(NA,NA,NA), + var = c(NA,NA,NA),meanAbundance = c(NaN,NaN,NaN)) compute_pooled(y) -yb <- y |> dplyr::filter(not_na > 1) +yb <- y |> dplyr::filter(nrMeasured > 1) - -bb <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb$config) +bb <- prolfqua::sim_lfq_data_peptide_config() +config <- bb$config data <- bb$data res1 <- summarize_stats(data, config) pv <- poolvar(res1, config) -stopifnot(nrow(pv) == nrow(res1)/5) +stopifnot(nrow(pv) == nrow(res1)/3) } \seealso{ diff --git a/man/sim_lfq_data_peptide_config.Rd b/man/sim_lfq_data_peptide_config.Rd index f9d8abf3e..11512039c 100644 --- a/man/sim_lfq_data_peptide_config.Rd +++ b/man/sim_lfq_data_peptide_config.Rd @@ -4,7 +4,12 @@ \alias{sim_lfq_data_peptide_config} \title{Simulate data, protein and peptide, with config} \usage{ -sim_lfq_data_peptide_config(Nprot = 10, with_missing = TRUE, seed = 1234) +sim_lfq_data_peptide_config( + Nprot = 10, + with_missing = TRUE, + weight_missing = 0.2, + seed = 1234 +) } \arguments{ \item{with_missing}{add missing values, default TRUE} diff --git a/man/sim_lfq_data_protein_config.Rd b/man/sim_lfq_data_protein_config.Rd index 721acf620..a12b6a310 100644 --- a/man/sim_lfq_data_protein_config.Rd +++ b/man/sim_lfq_data_protein_config.Rd @@ -4,7 +4,12 @@ \alias{sim_lfq_data_protein_config} \title{Simulate data, protein, with config} \usage{ -sim_lfq_data_protein_config(Nprot = 10, with_missing = TRUE, seed = 1234) +sim_lfq_data_protein_config( + Nprot = 10, + with_missing = TRUE, + weight_missing = 0.2, + seed = 1234 +) } \arguments{ \item{with_missing}{add missing values, default TRUE} diff --git a/man/summarize_stats.Rd b/man/summarize_stats.Rd index e63e396e0..06bd61f58 100644 --- a/man/summarize_stats.Rd +++ b/man/summarize_stats.Rd @@ -40,48 +40,38 @@ summarize stats output (compute quantiles) \examples{ -bb <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb$config) +bb <- prolfqua::sim_lfq_data_protein_config() +config <- bb$config data <- bb$data res1 <- summarize_stats(data, config) -d <- res1 |> dplyr::filter(protein_Id == "CON__P01030~9~NA" & peptide_Id == "AELADQAASWLTR") -d <- res1 |> dplyr::filter(protein_Id == "CON__Q3SZR3~50~NA" & peptide_Id == "EHFVDLLLSK") -#CON__P02769~18~NA VHKECCHGDLLECADDR -d <- res1 |> dplyr::filter(protein_Id == "CON__P02769~18~NA" & peptide_Id == "VHKECCHGDLLECADDR") -bb <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb$config) -data <- bb$data +bb <- prolfqua::sim_lfq_data_protein_config() + +res1 <- summarize_stats_all(bb$data, bb$config) -res1 <- summarize_stats_all(data, config) -d <- res1 |> dplyr::filter(protein_Id == "CON__P01030~9~NA" & peptide_Id == "AELADQAASWLTR") -d <- res1 |> dplyr::filter(protein_Id == "CON__Q3SZR3~50~NA" & peptide_Id == "EHFVDLLLSK") -#CON__P02769~18~NA VHKECCHGDLLECADDR -d <- res1 |> dplyr::filter(protein_Id == "CON__P02769~18~NA" & peptide_Id == "VHKECCHGDLLECADDR") -res1 |> dplyr::filter(dilution. == "pooled") +stopifnot((res1 |> dplyr::filter(group_ == "All") |> nrow()) == (res1 |> nrow())) library(ggplot2) -bb1 <- prolfqua_data('data_ionstar')$filtered() -config <- old2new(bb1$config$clone( deep = TRUE)) +bb1 <- prolfqua::sim_lfq_data_peptide_config() +config <- bb1$config data <- bb1$data stats_res <- summarize_stats(data, config) -summarize_stats_quantiles(stats_res, config) -summarize_stats_quantiles(stats_res, config, stats = "CV") -stats_res -bb <- prolfqua_data('data_ionstar')$normalized() -config <- old2new(bb$config$clone(deep = TRUE)) +sq <- summarize_stats_quantiles(stats_res, config) +sq <- summarize_stats_quantiles(stats_res, config, stats = "CV") +bb <- prolfqua::sim_lfq_data_peptide_config() +config <- bb$config data <- bb$data config$table$get_response() stats_res <- summarize_stats(data, config) -summarize_stats_quantiles(stats_res, config) -summarize_stats_quantiles(stats_res, config, stats = "sd") +sq <- summarize_stats_quantiles(stats_res, config) +sq <- summarize_stats_quantiles(stats_res, config, stats = "sd") stats_res <- summarize_stats(data, config) xx <- summarize_stats_quantiles(stats_res, config, probs = seq(0,1,by = 0.1)) -ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = dilution.)) + geom_line() + geom_point() +ggplot2::ggplot(xx$long, aes(x = probs, y = quantiles, color = group_)) + geom_line() + geom_point() } diff --git a/man/which_missing.Rd b/man/which_missing.Rd index 24b1053e6..9175dc9a3 100644 --- a/man/which_missing.Rd +++ b/man/which_missing.Rd @@ -4,10 +4,12 @@ \alias{which_missing} \title{add missing values to x vector based on the values of x} \usage{ -which_missing(x) +which_missing(x, weight_missing = 0.2) } \arguments{ \item{x}{vector of intensities} + +\item{weight_missing}{greater weight more missing} } \description{ add missing values to x vector based on the values of x