-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
261 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
--- | ||
title: "Modelling with Interactions." | ||
author: "Witold Wolski" | ||
date: "`r format(Sys.time(), '%d %B, %Y')`" | ||
output: | ||
html_document: default | ||
pdf_document: default | ||
vignette: | | ||
%\VignetteIndexEntry{Modelling with Interactions.} | ||
%\VignetteEncoding{UTF-8} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
editor_options: | ||
chunk_output_type: console | ||
--- | ||
|
||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) | ||
``` | ||
|
||
|
||
|
||
|
||
|
||
|
||
```{r} | ||
a <- c(a = 3,b = 4.5, c = 6, d = 7.5, e = 9) | ||
f1 <- list(l1 = 3,l2 = 4.5) | ||
f2 <- list(l1 = 0, l2 = 3) | ||
a = f1$l1 + f2$l1 | ||
b = f1$l2 + f2$l1 | ||
c = f1$l1 + f2$l2 | ||
d = f1$l2 + f2$l2 | ||
c(a,b,c,d) | ||
``` | ||
|
||
```{r LoadDataAndConfigure} | ||
datadir <- file.path(find.package("prolfqua") , "samples/maxquant_txt") | ||
inputMQfile <- file.path(datadir, "tiny2.zip") | ||
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx") | ||
startdata <- prolfqua::tidyMQ_ProteinGroups(inputMQfile) | ||
``` | ||
|
||
Read the sample annotation. The sample annotation must contain the `raw.file` name and the explanatory variables of your experiment, e.g. treatment, timepoint, genetic background, or other factors which you would like to check for confounding. | ||
|
||
|
||
```{r readAnnotation} | ||
annotation <- readxl::read_xlsx(inputAnnotation) | ||
head(annotation) | ||
annotation <- annotation |> dplyr::filter(sample != "e") | ||
annotation <- annotation |> | ||
dplyr::mutate(f1 = dplyr::case_when(sample %in% c("a","c") ~ "l1", TRUE ~ "l2"), | ||
f2 = dplyr::case_when(sample %in% c("a","b") ~ "l1", TRUE ~ "l2")) |> | ||
dplyr::arrange(sample) | ||
``` | ||
|
||
Merge the annotation with quantitative data using `inner_join` joining by `raw.file`. | ||
|
||
```{r addAnnotationToData} | ||
startdata <- dplyr::inner_join(annotation, startdata, by = "raw.file") | ||
``` | ||
|
||
We remove all proteins identified only by a single peptide. | ||
|
||
|
||
```{r filterForAtLeastTwoPeptides} | ||
startdata <- dplyr::filter(startdata, nr.peptides > 1) | ||
``` | ||
|
||
|
||
Then you need to _tell_ `prolfqua` which columns in the data frame contain what information. You do it using the `AnalysisTableAnnotation` class. | ||
|
||
```{r setupConfigs} | ||
atable <- prolfqua::AnalysisTableAnnotation$new() | ||
``` | ||
|
||
The `AnalysisTableAnnotation` has the following fields that need to be populated: | ||
|
||
- fileName | ||
- hierarchy | ||
- factors | ||
- workingIntensity | ||
|
||
, and we will discuss in more detail next. | ||
|
||
|
||
The `fileName` is the column with the raw file names, however for labelled TMT experiments, it can be used to hold the name of the TMT channel. | ||
|
||
```{r specifyRawFile} | ||
atable$fileName = "raw.file" | ||
``` | ||
|
||
The `hierarchy` field describes the structure of the MS data e.g, | ||
|
||
- protein | ||
- peptides | ||
- modified peptides | ||
- precursor | ||
|
||
In case of the MQ proteinGroups file we have data on protein level. | ||
|
||
```{r specifyProteinID} | ||
atable$hierarchy[["protein_Id"]] <- c("proteinID") | ||
``` | ||
|
||
In addition you need to describe the `factors` of the analysis, i.e, the column containing the explanatory variables. | ||
|
||
```{r specifyFactors} | ||
atable$factors[["f1."]] = "f1" | ||
atable$factors[["f2."]] = "f2" | ||
``` | ||
|
||
We also need to specify the column containing the protein abundances. | ||
|
||
```{r specifyIntensity} | ||
atable$setWorkIntensity("mq.protein.intensity") | ||
``` | ||
|
||
Finally we create the `AnalysisConfiguration` which needs the `AnalysisTableAnnotation` we just created and the `AnalysisParameters`. | ||
|
||
```{r createAnalysisConfig} | ||
config <- prolfqua::AnalysisConfiguration$new(atable) | ||
adata <- prolfqua::setup_analysis(startdata, config) | ||
``` | ||
|
||
Create the `LFQData` class instance and remove zeros from data (MaxQuant encodes missing values with zero). | ||
|
||
```{r removeSmallIntensities} | ||
lfqdata <- prolfqua::LFQData$new(adata, config) | ||
lfqdata$remove_small_intensities() | ||
lfqdata$factors() | ||
``` | ||
|
||
|
||
```{r} | ||
hm <- lfqdata$get_Plotter()$heatmap() | ||
``` | ||
|
||
```{r plotHeatmap} | ||
hm | ||
``` | ||
|
||
|
||
```{r} | ||
tr <- lfqdata$get_Transformer() | ||
lfqTrans <- tr$log2()$lfq | ||
lfqTrans$get_Plotter()$intensity_distribution_density() | ||
lfqTrans$response() | ||
lfqTrans$rename_response("abundance") | ||
``` | ||
|
||
|
||
# Model Fitting | ||
|
||
|
||
```{r specifyModel} | ||
formula_Batches <- | ||
prolfqua::strategy_lm("abundance ~ f1. * f2. ") | ||
# specify model definition | ||
modelName <- "Model" | ||
DEBUG <- TRUE | ||
Contrasts <- c("f1.l1vsf1.l2" = "f1.l1 - f1.l2", | ||
"f2.l1vsf2.l2" = "f2.l1 - f2.l2", | ||
"f1l1vsf1l2_gv_f2.l1" = "`f1.l1:f2.l1` - `f1.l2:f2.l1`", | ||
"f1l1vsf1l2_gv_f2.l2" = "`f1.l1:f2.l2` - `f1.l2:f2.l2`", | ||
"Interaction" = "`f1l1vsf1l2_gv_f2.l1` - `f1l1vsf1l2_gv_f2.l2`" | ||
) | ||
``` | ||
|
||
|
||
```{r buildModel} | ||
mod <- prolfqua::build_model( | ||
lfqTrans, | ||
formula_Batches) | ||
``` | ||
|
||
|
||
|
||
```{r anovaPvaluePlots, fig.cap="p-value distributions for ANOVA analysis."} | ||
mod$anova_histogram(what = "FDR.Pr..F.") | ||
``` | ||
|
||
## ANOVA | ||
|
||
Examine proteins with a significant interaction between the two factors treatment and batch. | ||
|
||
```{r anovaAnalysis} | ||
ANOVA <- mod$get_anova() | ||
ANOVA |> dplyr::filter(factor == "f1.:f2.") |> dplyr::arrange(FDR.Pr..F.) |> head(5) | ||
ANOVA$factor |> unique() | ||
protIntSig <- ANOVA |> dplyr::filter(factor == "f1.") |> | ||
dplyr::filter(FDR.Pr..F. < 0.25) | ||
protInt <- lfqTrans$get_copy() | ||
protInt$data <- protInt$data[protInt$data$protein_Id %in% protIntSig$protein_Id,] | ||
``` | ||
|
||
|
||
```{r fig.with=15, fig.height=15, fig.cap="Proteins with FDR < 0.5 for condition batch interaction in ANOVA."} | ||
ggpubr::ggarrange(plotlist = protInt$get_Plotter()$boxplots()$boxplot) | ||
``` | ||
|
||
# Compute contrasts | ||
|
||
```{r computeModeratedContrasts} | ||
contr <- prolfqua::ContrastsModerated$new(prolfqua::Contrasts$new(mod, Contrasts)) | ||
#contr$get_contrasts_sides() | ||
contrdf <- contr$get_contrasts() | ||
``` | ||
|
||
```{r} | ||
plotter <- contr$get_Plotter() | ||
plotter$volcano() | ||
plotter$ma_plot() | ||
``` | ||
|
||
|
||
|
||
## Annalyse contrasts with missing data imputation | ||
|
||
```{r} | ||
lfqTrans$config$table$factorDepth <- 2 | ||
#ContrastsSimpleImpute$debug("get_contrasts") | ||
contrSimple <- prolfqua::ContrastsSimpleImpute$new(lfqdata = lfqTrans, Contrasts) | ||
contrdfSimple <- contrSimple$get_contrasts() | ||
#na.omit(contrdfSimple) | ||
pl <- contrSimple$get_Plotter() | ||
pl$histogram_diff() | ||
pl$volcano() | ||
``` | ||
|
||
|
||
## Merge nonimputed and imputed data. | ||
|
||
```{r} | ||
dim(contr$get_contrasts()) | ||
dim(contrSimple$get_contrasts()) | ||
mergedContrasts <- prolfqua::addContrastResults(prefer = contr, add = contrSimple)$merged | ||
cM <- mergedContrasts$get_Plotter() | ||
plot <- cM$volcano() | ||
plot$FDR | ||
``` | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters