Skip to content

Commit

Permalink
simulate LFQ data peptide and protein
Browse files Browse the repository at this point in the history
  • Loading branch information
wolski committed Jan 18, 2024
1 parent 99dfce9 commit 9f9a807
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 95 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export(separate_factors)
export(separate_hierarchy)
export(setup_analysis)
export(sim_lfq_data)
export(sim_lfq_data_config)
export(sim_lfq_data_peptide_config)
export(spread_response_by_IsotopeLabel)
export(squeezeVarRob)
export(strategy_glm)
Expand Down
86 changes: 58 additions & 28 deletions R/simulate_LFQ_data.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,22 @@
#' simulate peptide level data with two groups
#' simulate protein level data with two groups
#' @export
#' @param Nprot number of porteins
#' @param N group size
#' @param fc D - down regulation N - matrix, U - regulation
#' @param prop proportion of down (D), up (U) and not regulated (N)
#' @examples
#' res <- sim_lfq_data()
#' sim_lfq_data(Nprot = 10)
#'
#' dd <- sim_lfq_data()
#' dd$abundance <- add_missing(dd$abundance)
#'
#' sim_lfq_data_config()
#' sim_lfq_data_config(with_missing = FALSE)

#' sim_lfq_data(Nprot = 10)
#' res <- sim_lfq_data(Nprot = 10, PEPTIDE = TRUE)
sim_lfq_data <- function(
Nprot = 20,
N = 4,
fc = list(A = c(D = -2, U = 2, N = 0), B = c(D = 1, U = -4)),
prop = list(A = c(D = 10, U = 10), B = c(D = 5, U = 20)),
mean_prot = 20,
sd = 1.2,
probability_of_success = 0.3
sdlog = log(1.2),
probability_of_success = 0.3,
PEPTIDE = FALSE
) {

proteins <- stringi::stri_rand_strings(Nprot, 6)
Expand All @@ -32,7 +27,7 @@ sim_lfq_data <- function(
prot <- data.frame(
proteinID = proteins,
nrPeptides = nrpeptides,
average_prot_abundance = rlnorm(Nprot,log(20),sdlog = log(sd)),
average_prot_abundance = rlnorm(Nprot,log(20),sdlog = sdlog),
mean_Ctrl = 0,
N_Ctrl = N,
sd = 1
Expand All @@ -57,32 +52,46 @@ sim_lfq_data <- function(
prot <- prot |> dplyr::mutate(!!groupMean := FC, !!groupSize := N)
}

# add row for each protein
peptide_df <- prot |> tidyr::uncount( nrPeptides )
# create peptide ids
peptide_df$peptideID <- stringi::stri_rand_strings(sum(prot$nrPeptides), 8)
if (PEPTIDE) {

# add row for each protein
peptide_df <- prot |> tidyr::uncount( nrPeptides )
# create peptide ids
peptide_df$peptideID <- stringi::stri_rand_strings(sum(prot$nrPeptides), 8)
} else {
peptide_df <- prot
}


# transform into long format
peptide_df2 <- peptide_df |> tidyr::pivot_longer(cols = tidyselect::starts_with(c("mean", "N_")),
names_to = "group" , values_to = "mean")
peptide_df2 <- peptide_df2 |> tidyr::separate(group, c("what", "group"))
peptide_df2 <- peptide_df2 |> tidyr::pivot_wider(names_from = "what", values_from = mean)

peptide_df2$avg_peptide_abd <-
with(peptide_df2,
rlnorm(nrow(peptide_df2),
meanlog = log(average_prot_abundance - mean),
sdlog = log(sd)))

sample_from_normal <- function(mean, sd, n) {
rnorm(n, mean, sd)
}

nrpep <- nrow(peptide_df2)
sampled_data <- matrix(nrow = nrpep, ncol = N)
colnames(sampled_data) <- c("V1","V2","V3","V4")
for (i in seq_len(nrpep)) {
sampled_data[i,] <- sample_from_normal(peptide_df2$avg_peptide_abd[i], peptide_df2$sd[1], peptide_df2$N[i])
colnames(sampled_data) <- paste0("V", 1:ncol(sampled_data))

peptide_df2$average_prot_abundance <- peptide_df2$average_prot_abundance - peptide_df2$mean

if (PEPTIDE) {
peptide_df2$avg_peptide_abd <-
with(peptide_df2,
rlnorm(nrow(peptide_df2),
meanlog = log(average_prot_abundance),
sdlog = sdlog))
for (i in seq_len(nrpep)) {
sampled_data[i,] <- sample_from_normal(peptide_df2$avg_peptide_abd[i], peptide_df2$sd[1], peptide_df2$N[i])
}

} else {
for (i in seq_len(nrpep)) {
sampled_data[i,] <- sample_from_normal(peptide_df2$average_prot_abundance[i], peptide_df2$sd[1], peptide_df2$N[i])
}
}

x <- dplyr::bind_cols(peptide_df2,sampled_data)
Expand All @@ -97,6 +106,7 @@ sim_lfq_data <- function(
return(peptideAbundances)
}


#' add missing values to x vector based on the values of x
#' @export
#' @param x vector of intensities
Expand All @@ -122,8 +132,8 @@ add_missing <- function(x){
#' @export
#' @examples
#'
sim_lfq_data_config <- function(Nprot = 10, with_missing = TRUE){
data <- sim_lfq_data(Nprot = Nprot)
sim_lfq_data_peptide_config <- function(Nprot = 10, with_missing = TRUE){
data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = TRUE)
if (with_missing) {
data$abundance <- add_missing(data$abundance)
}
Expand All @@ -141,3 +151,23 @@ sim_lfq_data_config <- function(Nprot = 10, with_missing = TRUE){
adata <- setup_analysis(data, config)
return(list(data = adata, config = config))
}

sim_lfq_data_protein_config <- function(Nprot = 10, with_missing = TRUE){
data <- sim_lfq_data(Nprot = Nprot, PEPTIDE = FALSE)
if (with_missing) {
data$abundance <- add_missing(data$abundance)
}
data$isotopeLabel <- "light"
data$qValue <- 0

atable <- AnalysisTableAnnotation$new()
atable$sampleName = "sample"
atable$factors["group_"] = "group"
atable$hierarchy[["protein_Id"]] = "proteinID"
atable$set_response("abundance")

config <- AnalysisConfiguration$new(atable)
adata <- setup_analysis(data, config)
return(list(data = adata, config = config))
}

61 changes: 35 additions & 26 deletions R/tidyMS_plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,19 +207,25 @@ plot_hierarchies_boxplot <- function(pdata,
#' @keywords internal
#' @examples
#'
#' iostar <- prolfqua_data('data_ionstar')$filtered()
#' iostar$config <- old2new(iostar$config)
#' iostar$data <- iostar$data |>
#' #iostar <- prolfqua_data('data_ionstar')$filtered()
#' #iostar$config <- old2new(iostar$config)
#' #iostar$data <- iostar$data |>
#' # dplyr::filter(protein_Id %in% sample(protein_Id, 2))
#' #unique(iostar$data$protein_Id)
#'
#' istar <- sim_lfq_data_config()
#' config <- istar$config
#' analysis <- istar$data
#' analysis <- analysis |>
#' dplyr::filter(protein_Id %in% sample(protein_Id, 2))
#' unique(iostar$data$protein_Id)
#'
#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config)
#' res <- plot_hierarchies_boxplot_df(analysis,config)
#' res$boxplot[[1]]
#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config,iostar$config$table$hierarchy_keys()[1])
#' res <- plot_hierarchies_boxplot_df(analysis,config,config$table$hierarchy_keys()[1])
#' res$boxplot[[1]]
#' res <- plot_hierarchies_boxplot_df(iostar$data,iostar$config,
#' iostar$config$table$hierarchy_keys()[1],
#' facet_grid_on = iostar$config$table$hierarchy_keys()[2])
#' res <- plot_hierarchies_boxplot_df(analysis,config,
#' config$table$hierarchy_keys()[1],
#' facet_grid_on = config$table$hierarchy_keys()[2])
#' res$boxplot[[1]]
#'
#' bb <- prolfqua_data('data_IonstarProtein_subsetNorm')
Expand Down Expand Up @@ -266,8 +272,8 @@ plot_hierarchies_boxplot_df <- function(pdata,
#' @family plotting
#' @examples
#'
#' istar <- prolfqua_data('data_ionstar')$filtered()
#' config <- old2new(istar$config$clone(deep=TRUE))
#' istar <- sim_lfq_data_config()
#' config <- istar$config
#' analysis <- istar$data
#'
#' pheat_map <- prolfqua::plot_heatmap_cor( analysis, config )
Expand Down Expand Up @@ -322,20 +328,21 @@ plot_heatmap_cor <- function(data,
#' plot heatmap with annotations
#'
#' @export
#' @param na_fraction fraction of NA values per row
#' @param show_rownames if TRUE shows row names, default FALSE
#' @keywords internal
#' @family plotting
#' @examples
#'
#' istar <- prolfqua_data('data_ionstar')$filtered()
#' stopifnot(nrow(istar$data) == 25780)
#' config <- old2new(istar$config$clone(deep=TRUE))
#' istar <- sim_lfq_data_config()
#' config <- istar$config
#' analysis <- istar$data
#'
#' plot.new()
#'
#' p <- plot_heatmap(analysis, config)
#' stopifnot(class(p) == "pheatmap")
#' p2 <- plot_heatmap(analysis, config, show_rownames = TRUE)
#' plot.new()
#' p2
#' stopifnot(class(p) == "pheatmap")
#'
plot_heatmap <- function(data,
config,
Expand Down Expand Up @@ -394,17 +401,19 @@ plot_heatmap <- function(data,
#' @family plotting
#' @export
#' @examples
#' bb <- prolfqua_data('data_IonstarProtein_subsetNorm')
#' new <- list(config = bb$config$clone( deep = TRUE), data = bb$data)
#' istar <- LFQData$new(new$data, new$config)
#' config <- old2new(istar$config$clone(deep=TRUE))
#'
#' istar <- sim_lfq_data_config()
#' config <- istar$config
#' analysis <- istar$data
#' dev.off()
#' rs <- plot_raster(analysis, config, show_rownames=FALSE)
#' print(rs)
#' plot_raster(analysis[1,], config)
#' plot_raster(analysis, config, "var")
#' plot_raster(analysis, config, show_rownames = TRUE)
#' stopifnot(class(rs) == "pheatmap")
#' rs <- plot_raster(analysis[1,], config)
#' stopifnot(is.null(rs))
#' rs <- plot_raster(analysis, config, "var")
#' stopifnot(class(rs) == "pheatmap")
#' rs <- plot_raster(analysis, config, show_rownames = TRUE)
#' stopifnot(class(rs) == "pheatmap")
#'
plot_raster <- function(data,
config,
arrange = c("mean", "var"),
Expand Down
16 changes: 10 additions & 6 deletions man/plot_heatmap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/plot_heatmap_cor.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 15 additions & 9 deletions man/plot_hierarchies_boxplot_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 11 additions & 9 deletions man/plot_raster.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 9f9a807

Please sign in to comment.