Skip to content

Commit

Permalink
adding nr obs aggregation to aggregator
Browse files Browse the repository at this point in the history
  • Loading branch information
wolski committed Apr 5, 2024
1 parent 6177bc6 commit 7a7dfb3
Show file tree
Hide file tree
Showing 15 changed files with 143 additions and 66 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ export(ProteinAnnotation)
export(R6_extract_values)
export(UpSet_interaction_missing_stats)
export(UpSet_missing_stats)
export(add_missing)
export(adjust_p_values)
export(aggregate_contrast)
export(aggregate_intensity_topN)
Expand Down Expand Up @@ -103,6 +102,7 @@ export(names_to_matrix)
export(normalize_log2_robscale)
export(nr_B_in_A)
export(nr_B_in_A_per_sample)
export(nr_obs)
export(old2new)
export(pairs_smooth)
export(pairs_w_abline)
Expand Down Expand Up @@ -174,6 +174,7 @@ export(tidy_to_wide)
export(tidy_to_wide_config)
export(transform_work_intensity)
export(volcano_plotly)
export(which_missing)
import(dplyr)
import(ggplot2)
importFrom(MASS,rlm)
Expand Down
7 changes: 3 additions & 4 deletions R/AnalysisConfiguration.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ R6_extract_values <- function(r6class){
setup_analysis <- function(data, configuration, cc = TRUE, from_factors = FALSE){
configuration <- configuration$clone(deep = TRUE)
table <- configuration$table
if (is.null(table$fileName)) { stop("fileName column is not specified in configuration.")}
if (!table$fileName %in% colnames(data)) { stop("File name column :" , table$fileName , ", is missing in data.")}

# extract hierarchy columns
for (i in seq_along(table$hierarchy))
Expand Down Expand Up @@ -194,7 +196,7 @@ setup_analysis <- function(data, configuration, cc = TRUE, from_factors = FALSE

txd <- data |> group_by(!!!syms(c(table$fileName, table$hierarchy_keys(), table$isotopeLabel))) |>
summarize(n = n())
if (length(table(txd$n)) > 1) {
if (any(txd$n > 1)) {
str <- paste("There is more than ONE observations for each : ", paste( table$hierarchy_keys(), collapse = ", "), ",\n",
"and sample : ", table$sampleName, "; (filename) : ", table$fileName, "\n")
warning(str)
Expand All @@ -204,12 +206,9 @@ setup_analysis <- function(data, configuration, cc = TRUE, from_factors = FALSE
#tmp <- prolfqua::tidy_to_wide_config(data, configuration)
#message("nr rows and nr columns")
#message(paste(dim(tmp$data),collapse = ", "))

if (cc) {
data <- complete_cases( data , configuration)
}


return( data )
}

Expand Down
6 changes: 4 additions & 2 deletions R/LFQDataAggregator.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#' \dontrun{
#' lfqAggregator$write_plots(tempdir())
#' }
#'
LFQDataAggregator <- R6::R6Class(
"LFQDataAggregator",
public = list(
Expand Down Expand Up @@ -75,8 +76,8 @@ LFQDataAggregator <- R6::R6Class(
if (!self$lfq$is_transformed()) {
warning("You did not transform the intensities.",
"medpolish works best with already variance stabilized intensities.",
"Use LFQData$get_Transformer to transform the data.",
self$lfq$config$table$workIntensity,)
"Use LFQData$get_Transformer to transform the data :",
self$lfq$config$table$workIntensity)
}
res <- estimate_intensity(self$lfq$data, self$lfq$config, .func = medpolish_estimate_dfconfig)
self$lfq_agg <- LFQData$new(res$data, res$config, prefix = self$prefix)
Expand All @@ -95,6 +96,7 @@ LFQDataAggregator <- R6::R6Class(
}

res <- estimate_intensity(self$lfq$data, self$lfq$config, .func = rlm_estimate_dfconfig)
res <-
self$lfq_agg <- LFQData$new(res$data, res$config, prefix = self$prefix)
invisible(self$lfq_agg)
},
Expand Down
17 changes: 10 additions & 7 deletions R/simulate_LFQ_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ sim_lfq_data <- function(
#' @param x vector of intensities
#'
#'
add_missing <- function(x){
which_missing <- function(x){
missing_prop <- pnorm(x, mean = mean(x), sd = sd(x))
# sample TRUE or FALSE with propability in missing_prop
samplemiss <- function(missing_prop) {
Expand All @@ -136,8 +136,8 @@ add_missing <- function(x){

missing_values <- sapply(missing_prop, samplemiss)
# Introduce missing values into the vector x
x[missing_values] <- NA
return(x)
#x[missing_values] <- NA
return(missing_values)
}


Expand All @@ -147,6 +147,7 @@ add_missing <- function(x){
#' @param seed seed for reproducibility, if NULL no seed is set.
#' @export
#' @examples
#' undebug(sim_lfq_data_peptide_config)
#' x <- sim_lfq_data_peptide_config()
#' stopifnot("data.frame" %in% class(x$data))
#' stopifnot("AnalysisConfiguration" %in% class(x$config))
Expand All @@ -156,13 +157,15 @@ sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE, seed =
}
data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = TRUE)
if (with_missing) {
data$abundance <- add_missing(data$abundance)
not_missing <- !which_missing(data$abundance)
data <- data[not_missing,]
}
data$isotopeLabel <- "light"
data$qValue <- 0

atable <- AnalysisTableAnnotation$new()
atable$sampleName = "sample"
atable$fileName = "sample"

atable$factors["group_"] = "group"
atable$hierarchy[["protein_Id"]] = c("proteinID", "idtype2")
atable$hierarchy[["peptide_Id"]] = "peptideID"
Expand All @@ -188,13 +191,13 @@ sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE, seed =
}
data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = FALSE)
if (with_missing) {
data$abundance <- add_missing(data$abundance)
data <- data[!which_missing(data$abundance),]
}
data$isotopeLabel <- "light"
data$qValue <- 0

atable <- AnalysisTableAnnotation$new()
atable$sampleName = "sample"
atable$fileName = "sample"
atable$nr_children = "nr_peptides"
atable$factors["group_"] = "group"
atable$hierarchy[["protein_Id"]] = c("proteinID", "idtype2")
Expand Down
56 changes: 36 additions & 20 deletions R/tidyMS_R6_TransitionCorrelations.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,20 +219,38 @@ tidy_to_wide <- function(data,
#' @return list with data, rowdata, and annotation (colData)
#' @examples
#'
#' dd <- prolfqua_data('data_spectronautDIA250_A')
#' config <- dd$config_f()
#' analysis <- dd$analysis(dd$data,config)
#' res <- tidy_to_wide_config(analysis, config)
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' config <- dd$config
#' data <- dd$data
#' res <- tidy_to_wide_config(data, config)
#' testthat::expect_equal(nrow(res$rowdata), nrow(res$data))
#' testthat::expect_equal(ncol(res$data) - ncol(res$rowdata) , nrow(res$annotation))
#' res <- tidy_to_wide_config(analysis, config, as.matrix = TRUE)
#' dim(res$data) == c(823, 45)
#' dim(res$annotation) == c(45, 6)
#' dim(res$rowdata) == c(823, 4)
#' res <- tidy_to_wide_config(data, config, as.matrix = TRUE)
#' dim(res$data) == c(28, 12)
#' dim(res$annotation) == c(12, 3)
#' dim(res$rowdata) == c(28, 3)
#'
#' res <- scale(res$data)
#'
tidy_to_wide_config <- function(data, config, as.matrix = FALSE, fileName = FALSE, sep="~lfq~"){
#' tidy_to_wide_config(data, config, value = config$table$nr_children)
#'
#'
#' xt <- prolfqua::LFQData$new(dd$data, dd$config)
#' xt$data$nr_children
#' #xt$config$table$is_response_transformed <- TRUE
#' res <- xt$get_Aggregator()
#' x <- res$medpolish()
#' dd <- prolfqua::sim_lfq_data_protein_config()
#' dd$config$table$nr_children
#' dd$data
#' xt <- tidy_to_wide_config(dd$data, dd$config, value = dd$config$table$nr_children)
#' xt$data
#'
tidy_to_wide_config <- function(data, config,
as.matrix = FALSE,
fileName = FALSE,
sep="~lfq~",
value = config$table$get_response()
){
if (fileName) {
newcolname <- config$table$fileName
}else{
Expand All @@ -245,7 +263,7 @@ tidy_to_wide_config <- function(data, config, as.matrix = FALSE, fileName = FALS

res <- tidy_to_wide( data, c(config$table$hierarchy_keys(),config$table$isotopeLabel) ,
newcolname,
value = config$table$get_response() )
value = value )
rowdata <- res |> dplyr::select(all_of(c(config$table$hierarchy_keys(),config$table$isotopeLabel)))
if (as.matrix) {
resMat <- as.matrix(dplyr::select(res,-dplyr::one_of(c(config$table$hierarchy_keys(),config$table$isotopeLabel))))
Expand All @@ -268,14 +286,14 @@ tidy_to_wide_config <- function(data, config, as.matrix = FALSE, fileName = FALS
#'
#' @keywords internal
#' @examples
#' dd <- prolfqua_data('data_spectronautDIA250_A')
#' conf <- dd$config_f()
#' analysis <- dd$analysis(dd$data,conf)
#' res <- tidy_to_wide_config(analysis, conf, as.matrix = TRUE)
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' data <- dd$data
#' conf <- dd$config
#' res <- tidy_to_wide_config(data, conf, as.matrix = TRUE)
#'
#' res <- scale(res$data)
#' xx <- response_matrix_as_tibble(res,"srm_intensityScaled", conf)
#' xx <- response_matrix_as_tibble(res,"srm_intensityScaled", conf,analysis)
#' xx <- response_matrix_as_tibble(res,"srm_intensityScaled", conf, data)
#' conf$table$get_response() == "srm_intensityScaled"
#'
response_matrix_as_tibble <- function(pdata, value, config, data = NULL, sep = "~lfq~"){
Expand Down Expand Up @@ -311,10 +329,8 @@ response_matrix_as_tibble <- function(pdata, value, config, data = NULL, sep = "
#' @examples
#'
#'
#' bb <- prolfqua_data('data_ionstar')$filtered()
#' bb$config <- old2new(bb$config)
#' stopifnot(nrow(bb$data) == 25780)
#' conf <- bb$config$clone(deep=TRUE)
#' bb <- prolfqua::sim_lfq_data_peptide_config()
#' conf <- bb$config
#' sample_analysis <- bb$data
#' pepIntensityNormalized <- transform_work_intensity(sample_analysis, conf, log2)
#' s1 <- get_robscales(pepIntensityNormalized, conf)
Expand Down
38 changes: 31 additions & 7 deletions R/tidyMS_aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -580,10 +580,8 @@ old2new <- function(config) {
#' @examples
#'
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#'
#' config <- dd$config
#' data <- dd$data
#'
#' data <- prolfqua::transform_work_intensity(data, config, log2)
#' bbMed <- estimate_intensity(data, config, .func = medpolish_estimate_dfconfig)
#' bbRob <- estimate_intensity(data, config, .func = rlm_estimate_dfconfig)
Expand All @@ -601,8 +599,6 @@ estimate_intensity <- function(data, config, .func)
config <- config$clone(deep = TRUE)

xnested <- data |> group_by_at(config$table$hierarchy_keys_depth()) |> nest()
nr_children <- data |> group_by_at(config$table$hierarchy_keys_depth()) |>
summarize(!!config$table$nr_children := n())
pb <- progress::progress_bar$new(total = nrow(xnested))
message("starting aggregation")

Expand All @@ -622,10 +618,31 @@ estimate_intensity <- function(data, config, .func)
dplyr::select_at(c(config$table$hierarchy_keys_depth(), makeName)) |>
tidyr::unnest(cols = makeName) |>
dplyr::ungroup()
unnested <- dplyr::inner_join(nr_children, unnested)

new_child = paste0("nr_",config$table$hierarchy_keys_depth())
res_nr_children <- nr_obs(data, config, new_child = new_child)
unnested <- inner_join(unnested, res_nr_children, by = c(config$table$hierarchy_keys_depth(), config$table$fileName))
newconfig$table$nr_children = new_child
return(list(data = unnested, config = newconfig))
}

#' Aggregates e.g. protein abundances from peptide abundances
#'
#' @export
#' @examples
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' dd$data <- na.omit(dd$data)
#' xd <- nr_obs(dd$data, dd$config)
#'
#' #xd |> head()
#'
#' xd$nr_children |> table()
nr_obs <- function(data, config, new_child = config$table$nr_children){
nr_children <- data |> group_by(!!!rlang::syms(c(config$table$hierarchy_keys_depth(), config$table$fileName))) |>
summarize(!!new_child := sum(!!sym(config$table$nr_children), na.rm = TRUE))
return(nr_children)
}

#' Plot feature data and result of aggregation
#'
#' @param data data.frame before aggregation
Expand Down Expand Up @@ -696,8 +713,8 @@ plot_estimate <- function(data, config, data_aggr, config_reduced, show.legend=
#' @keywords internal
#' @examples
#'
#' dd <- prolfqua_data('data_ionstar')$filtered()
#' config <- old2new(dd$config)
#' dd <- prolfqua::sim_lfq_data_peptide_config()
#' config <- dd$config
#' res <- dd$data
#' ranked <- rank_peptide_by_intensity(res,config)
#'
Expand Down Expand Up @@ -748,6 +765,13 @@ aggregate_intensity_topN <- function(pdata , config, .func, N = 3){
config,
workIntensity = newcol,
hierarchy = config$table$hierarchy[seq_len(config$table$hierarchyDepth)])

new_child_name <- paste0("nr_", config$table$hierarchy_keys_depth() )
res_nr_children <- nr_obs(pdata, config, new_child = new_child_name)
sumTopInt <- inner_join(
sumTopInt, res_nr_children,
by = c(config$table$fileName, config$table$hierarchy_keys_depth()))
newconfig$table$nr_children = new_child_name
return(list(data = sumTopInt, config = newconfig))
}

Expand Down
1 change: 1 addition & 0 deletions man/LFQDataAggregator.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/aggregate_intensity_topN.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions man/estimate_intensity.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions man/get_robscales.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/nr_obs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/response_matrix_as_tibble.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7a7dfb3

Please sign in to comment.