diff --git a/NAMESPACE b/NAMESPACE index c317e6891..8707a4fe9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -102,7 +102,7 @@ export(names_to_matrix) export(normalize_log2_robscale) export(nr_B_in_A) export(nr_B_in_A_per_sample) -export(nr_obs_hierarchy) +export(nr_obs_experiment) export(nr_obs_sample) export(old2new) export(pairs_smooth) diff --git a/R/AnalysisConfiguration.R b/R/AnalysisConfiguration.R index f5c9b78bc..a9614c461 100644 --- a/R/AnalysisConfiguration.R +++ b/R/AnalysisConfiguration.R @@ -396,15 +396,16 @@ hierarchy_counts <- function(pdata, config){ #' #' config <- bb$config #' data <- bb$data -#' res <- hierarchy_counts_sample(data, config) +#' res <- hierarchy_counts_sample(data, config, nr_children = 1) #' res() #' x <- res("long") #' res("plot") -#' +#' # filters on peptide level #' res <- hierarchy_counts_sample(data, config, nr_children = 2) #' res() #' x2 <- res("long") #' res("plot") +#' # filters on protein level based on peptide count #' bb <- prolfqua::sim_lfq_data_protein_config() #' res <- hierarchy_counts_sample(bb$data, bb$config, nr_children = 2) #' x1 <- res() @@ -414,15 +415,23 @@ hierarchy_counts <- function(pdata, config){ #' x2 <- res() #' res("long") #' res("plot") -#' dplyr::inner_join(x1, -#' x2, -#' by = c("isotopeLabel","sampleName"), -#' suffix =c("2","1")) +#' x1$nr_children <- 2 +#' x2$nr_children <- 1 +#' xl <- dplyr::bind_rows(x1, x2) +#' +#' +#' +#' +#' xl$nr_children |> table() +#' nudgeval <- -mean(xl$protein_Id) * 0.05 +#' ggplot(xl, aes(x = sampleName, y = protein_Id, fill = as.character(nr_children)) ) + +#' geom_bar(stat = "identity", position = position_dodge()) #' hierarchy_counts_sample <- function( pdata, configuration, nr_children = 1) { + hierarchy <- configuration$table$hierarchy_keys() summary <- pdata |> dplyr::filter(!is.na(!!rlang::sym(configuration$table$get_response() )), !!rlang::sym(configuration$table$nr_children ) >= .env$nr_children) |> @@ -440,7 +449,6 @@ hierarchy_counts_sample <- function( if (value == "long") { return(long) }else if (value == "plot" & nrow(long) > 0) { - nudgeval <- -mean(long$nr) * 0.05 # TODO(WEW) check potential problem with sampleName ggplot2::ggplot(long, ggplot2::aes(x = !!rlang::sym(configuration$table$sampleName), y = .data$nr)) + diff --git a/R/Contrasts.R b/R/Contrasts.R index 606d430e4..39a01d71e 100644 --- a/R/Contrasts.R +++ b/R/Contrasts.R @@ -49,6 +49,8 @@ Contrasts <- R6::R6Class( contrast_result = NULL, #' @field global use a global linear function (determined by get_linfct) global = TRUE, + #' @field protein_annot holds protein annotation + protein_annot = NULL, #' @description #' initialize #' create Contrast diff --git a/R/ContrastsPlotter.R b/R/ContrastsPlotter.R index ab9db7e1b..910be0203 100644 --- a/R/ContrastsPlotter.R +++ b/R/ContrastsPlotter.R @@ -76,6 +76,8 @@ ContrastsPlotter <- R6::R6Class( fcthresh = 1, #' @field avg.abundance name of column containing avg abundance values. avg.abundance = character(), + #' @field avg.abundance name of column containing avg abundance values. + protein_annot = NULL, #' @description #' create Crontrast_Plotter #' @param contrastDF frame with contrast data @@ -98,7 +100,8 @@ ContrastsPlotter <- R6::R6Class( modelName = "modelName", diff = "diff", contrast = "contrast", - avg.abundance = "avgAbd" + avg.abundance = "avgAbd", + protein_annot = NULL ){ self$contrastDF <- tidyr::unite( contrastDF, @@ -113,6 +116,7 @@ ContrastsPlotter <- R6::R6Class( self$fcthresh = fcthresh self$contrast = contrast self$avg.abundance = avg.abundance + self$protein_annot = protein_annot }, #' @description #' plot histogram of selected scores (e.g. p-value, FDR, t-statistics) diff --git a/R/ProteinAnnotation.R b/R/ProteinAnnotation.R index 237edac7a..7c7de0a98 100644 --- a/R/ProteinAnnotation.R +++ b/R/ProteinAnnotation.R @@ -6,16 +6,13 @@ #' @examples #' #' istar <-prolfqua::sim_lfq_data_peptide_config() -#' data <- istar$data -#' -#' -#' lfqdata <- LFQData$new(data, istar$config) +#' lfqdata <- LFQData$new(istar$data, istar$config) #' pannot <- ProteinAnnotation$new( lfqdata ) #' pannot$annotate_decoys() #' pannot$annotate_contaminants() #' dd <- pannot$clean() #' tmp <- lfqdata$get_subset(dd) -#' +#' pannot$row_annot #' ProteinAnnotation <- R6::R6Class("ProteinAnnotation", @@ -49,8 +46,12 @@ ProteinAnnotation <- } else { self$row_annot <- distinct(select(lfqdata$data, self$pID)) } - - + if (!self$nr_peptides %in% colnames(row_annot) ) { + self$row_annot <- inner_join( + self$row_annot, + nr_obs_experiment(lfqdata$data, lfqdata$config, name_nr_child = self$nr_peptides), + by = self$pID) + } }, #' @description #' annotate rev sequences diff --git a/R/tidyMS_R6_TransitionCorrelations.R b/R/tidyMS_R6_TransitionCorrelations.R index ceb3964dd..9db5acd02 100644 --- a/R/tidyMS_R6_TransitionCorrelations.R +++ b/R/tidyMS_R6_TransitionCorrelations.R @@ -709,28 +709,37 @@ nr_obs_sample <- function(data, config, new_child = config$table$nr_children){ #' Aggregates e.g. protein abundances from peptide abundances #' #' @export +#' @param data tidy data +#' @param config prolfqua config +#' @param from_children compute from existing child stats +#' @param name_nr_child how to name column #' @examples #' dd <- prolfqua::sim_lfq_data_peptide_config() #' -#' xd <- nr_obs_hierarchy(dd$data, dd$config) +#' xd <- nr_obs_experiment(dd$data, dd$config) +#' xd +#' xd <- nr_obs_experiment(dd$data, dd$config, from_children = FALSE) #' xd -#' dp <- prolfqua::sim_lfq_data_protein_config() -#' debug(nr_obs_hierarchy) -#' nr_obs_sample(dp$data, dp$config) -#' xd <- nr_obs_hierarchy(dp$data, dp$config) #' +#' dp <- prolfqua::sim_lfq_data_protein_config() +#' undebug(nr_obs_experiment) +#' nr_obs_experiment(dp$data, dp$config) +#' nr_obs_experiment(dp$data, dp$config, from_children = FALSE) #' -nr_obs_hierarchy <- function(data, config, from_children = TRUE , name_nr_child = "nr_child_exp"){ +nr_obs_experiment <- function(data, config, from_children = TRUE, + name_nr_child = "nr_child_exp"){ tb <- config$table if (!from_children & (tb$hierarchyDepth < length(tb$hierarchy_keys())) ) { - xq <- data |> tidyr::select(tb$hierarchy_keys()) |> + xq <- data |> dplyr::select(tb$hierarchy_keys()) |> distinct() |> dplyr::group_by(!!sym(tb$hierarchy_keys_depth())) |> dplyr::summarize(!!name_nr_child := dplyr::n(), .groups = "drop") + return(xq) } else { xz <- nr_obs_sample(data,config) - xz <- x |> group_by(!!sym(tb$hierarchy_keys_depth())) |> + xz <- xz |> group_by(!!sym(tb$hierarchy_keys_depth())) |> summarize(!!name_nr_child := max(!!sym(tb$nr_children)), .groups = "drop") + return(xz) } } diff --git a/man/Contrasts.Rd b/man/Contrasts.Rd index bbfcc54ee..e1519c311 100644 --- a/man/Contrasts.Rd +++ b/man/Contrasts.Rd @@ -97,6 +97,8 @@ Other modelling: \item{\code{contrast_result}}{data frame containing results of contrast computation} \item{\code{global}}{use a global linear function (determined by get_linfct)} + +\item{\code{protein_annot}}{holds protein annotation} } \if{html}{\out{}} } diff --git a/man/ContrastsPlotter.Rd b/man/ContrastsPlotter.Rd index 866dffe5a..7377992b1 100644 --- a/man/ContrastsPlotter.Rd +++ b/man/ContrastsPlotter.Rd @@ -143,6 +143,8 @@ Other plotting: \item{\code{fcthresh}}{fold change threshold} +\item{\code{avg.abundance}}{name of column containing avg abundance values.} + \item{\code{avg.abundance}}{name of column containing avg abundance values.} } \if{html}{\out{}} @@ -181,7 +183,8 @@ create Crontrast_Plotter modelName = "modelName", diff = "diff", contrast = "contrast", - avg.abundance = "avgAbd" + avg.abundance = "avgAbd", + protein_annot = NULL )}\if{html}{\out{}} } diff --git a/man/ProteinAnnotation.Rd b/man/ProteinAnnotation.Rd index d0230aa22..a28d38618 100644 --- a/man/ProteinAnnotation.Rd +++ b/man/ProteinAnnotation.Rd @@ -11,16 +11,13 @@ Decorates LFQData with a row annotation and some protein specific functions. \examples{ istar <-prolfqua::sim_lfq_data_peptide_config() -data <- istar$data - - -lfqdata <- LFQData$new(data, istar$config) +lfqdata <- LFQData$new(istar$data, istar$config) pannot <- ProteinAnnotation$new( lfqdata ) pannot$annotate_decoys() pannot$annotate_contaminants() dd <- pannot$clean() tmp <- lfqdata$get_subset(dd) - +pannot$row_annot } \seealso{ diff --git a/man/hierarchy_counts_sample.Rd b/man/hierarchy_counts_sample.Rd index 0266a4d6c..cba0af31e 100644 --- a/man/hierarchy_counts_sample.Rd +++ b/man/hierarchy_counts_sample.Rd @@ -20,15 +20,16 @@ bb <- prolfqua::sim_lfq_data_peptide_config() config <- bb$config data <- bb$data -res <- hierarchy_counts_sample(data, config) +res <- hierarchy_counts_sample(data, config, nr_children = 1) res() x <- res("long") res("plot") - +# filters on peptide level res <- hierarchy_counts_sample(data, config, nr_children = 2) res() x2 <- res("long") res("plot") +# filters on protein level based on peptide count bb <- prolfqua::sim_lfq_data_protein_config() res <- hierarchy_counts_sample(bb$data, bb$config, nr_children = 2) x1 <- res() @@ -38,10 +39,17 @@ res <- hierarchy_counts_sample(bb$data, bb$config, nr_children = 1) x2 <- res() res("long") res("plot") -dplyr::inner_join(x1, - x2, - by = c("isotopeLabel","sampleName"), - suffix =c("2","1")) +x1$nr_children <- 2 +x2$nr_children <- 1 +xl <- dplyr::bind_rows(x1, x2) + + + + +xl$nr_children |> table() +nudgeval <- -mean(xl$protein_Id) * 0.05 +ggplot(xl, aes(x = sampleName, y = protein_Id, fill = as.character(nr_children)) ) + + geom_bar(stat = "identity", position = position_dodge()) } \seealso{ diff --git a/man/nr_obs_experiment.Rd b/man/nr_obs_experiment.Rd new file mode 100644 index 000000000..a0d78d7d5 --- /dev/null +++ b/man/nr_obs_experiment.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyMS_R6_TransitionCorrelations.R +\name{nr_obs_experiment} +\alias{nr_obs_experiment} +\title{Aggregates e.g. protein abundances from peptide abundances} +\usage{ +nr_obs_experiment( + data, + config, + from_children = TRUE, + name_nr_child = "nr_child_exp" +) +} +\arguments{ +\item{data}{tidy data} + +\item{config}{prolfqua config} + +\item{from_children}{compute from existing child stats} + +\item{name_nr_child}{how to name column} +} +\description{ +Aggregates e.g. protein abundances from peptide abundances +} +\examples{ +dd <- prolfqua::sim_lfq_data_peptide_config() + +xd <- nr_obs_experiment(dd$data, dd$config) +xd +xd <- nr_obs_experiment(dd$data, dd$config, from_children = FALSE) +xd + +dp <- prolfqua::sim_lfq_data_protein_config() +undebug(nr_obs_experiment) +nr_obs_experiment(dp$data, dp$config) +nr_obs_experiment(dp$data, dp$config, from_children = FALSE) + +} diff --git a/man/nr_obs_hierarchy.Rd b/man/nr_obs_hierarchy.Rd deleted file mode 100644 index 78abcfc23..000000000 --- a/man/nr_obs_hierarchy.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyMS_R6_TransitionCorrelations.R -\name{nr_obs_hierarchy} -\alias{nr_obs_hierarchy} -\title{Aggregates e.g. protein abundances from peptide abundances} -\usage{ -nr_obs_hierarchy( - data, - config, - from_children = TRUE, - name_nr_child = "nr_child_exp" -) -} -\description{ -Aggregates e.g. protein abundances from peptide abundances -} -\examples{ -dd <- prolfqua::sim_lfq_data_peptide_config() - -xd <- nr_obs_hierarchy(dd$data, dd$config) -xd -dp <- prolfqua::sim_lfq_data_protein_config() -debug(nr_obs_hierarchy) -nr_obs_sample(dp$data, dp$config) -xd <- nr_obs_hierarchy(dp$data, dp$config) - - -}