-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.R
58 lines (38 loc) · 1.63 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library(hgu133plus2.db)
library(pheatmap)
library(ggplot2)
library(DESeq2)
library(readxl)
library(writexl)
library(tibble)
library(scales)
library(viridis)
library(tidyr)
source("utils.R")
# Path of the xlxs file containing the raw non-normalized sequence read counts at either the gene or transcript level
rawCountsFile <- "RawCounts2.xlsx"
# Path of the xlxs file containing the properties of the samples
sampleDataFile <- "SampleData2.xlsx"
# Indicates the type of RNA ID in the rawCountFile
ID_TYPE <- "ENSEMBL"
# Indicates which sample property to compare
Compare <- "Disease"
# Threshold used to discard the genes/transcript that have a small amount of read count
threshold <- 30
# Path of the folder where the results and plots will be saved
saveFolder <- "Results"
dir.create(saveFolder)
mapping <- build_mapping(ID_TYPE)
rawData <- read_raw_data(rawCountsFile, sampleDataFile)
logRawCounts <- log2(rawData$rawCounts+1)
PCA(logRawCounts, rawData$sampleData, Compare, saveFolder)
box_intensities(logRawCounts, saveFolder)
deseq2Data <- make_DEseq2DataSet(rawData$rawCounts, rawData$sampleData, Compare, threshold, saveFolder)
box_deviation(deseq2Data, saveFolder)
normalized_counts <- log2(counts(deseq2Data, normalized=TRUE))
normalized_PCA(normalized_counts, rawData$sampleData, Compare, saveFolder)
heatmap(normalized_counts, rawData$sampleData, Compare, saveFolder)
deseq2ResDF <- diff_Analysis(deseq2Results, deseq2Data, rawData$sampleData, Compare, mapping, saveFolder)
fold_vs_count(deseq2ResDF, saveFolder)
count_vs_fold(deseq2ResDF, saveFolder)
volcano(deseq2ResDF, saveFolder)