forked from bobthecat/R-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscripts_lecture_2.r
169 lines (141 loc) · 5.46 KB
/
scripts_lecture_2.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#
# Lecture_2.r
#
# Created by David Ruau on 2011-04-26.
# Copyright (c) 2011 Dept. of Pediatrics and Anesthesia. All rights reserved.
## ACCESS R-CLOUD
# OPEN THE TUTORIAL PROJECT.
#####################################################
## PREREQUISITE PACKAGE TO INSTALL
#####################################################
source("http://bioconductor.org/biocLite.R")
biocLite("GEOquery")
## load the required library
library(GEOquery)
library(affy)
#####################################################
## OBTAIN RAW DATA FROM GEO
#####################################################
# GSE12499 raw data set with 3 cell types
getGEOSuppFiles("GSE12499")
# uncompress the archive and clean up to keep only the essential
system('tar -xf GSE12499/GSE12499_RAW.tar -C GSE12499/')
system('rm GSE12499/*.CHP*')
system('rm GSE12499/*.tar')
#####################################################
## IMPORT THE AFFYMETRIX DATA IN R
#####################################################
da <- ReadAffy(celfile.path="./GSE12499/", compress=TRUE)
# the raw Affymetrix data are stored in a AffyBatch object
class(da)
#####################################################
## ADDING SOME PHENODATA
#####################################################
# what are the phenoData by default
pData(da)
#
sampleNames(da)
# upload the new phenodata with the one from the text file
# upload the treatment.txt file in R
URL <- "http://www.stanford.edu/~druau/treatment.txt"
download.file(URL, "./treatment.txt")
pd <- read.table("treatment.txt", sep='\t', header=TRUE)
pd
# comma separated files
URL <- "http://www.stanford.edu/~druau/treatment.csv"
download.file(URL, "./treatment.csv")
pd <- read.table("treatment.csv", sep=',', header=TRUE)
pd
## update the phenoData of your data set
pData(da) <- pd
pData(da)
# rename the samples with meaningful
sampleNames(da) <- pd[,1]
#####################################################
## QUALITY ASSESSMENT
#####################################################
# require library
library(affyPLM)
# compute a PLM set from the AffyBatch object by fitting a robust linear model to the probe level data.
pset <- fitPLM(da)
# little function to plot the 4 images for visual spatial artifact detection
img.Test <- function(batch,pset,x) {
par(mfrow = c(2,2))
image(batch[,x])
image(pset, type = "weights", which = x)
image(pset, type = "resids", which = x)
image(pset, type = "sign.resids", which = x)
par(mfrow = c(1,1))
}
# run the function for the first microarray
img.Test(da, pset, 1)
# RLE (Relative Log Expression)
cols <- brewer.pal(12, "Set3")
Mbox(pset, col = cols, main ="RLE (Relative Log Expression)",
xlab="Assuming that the majority of the gene are not changing\n
Ideally these boxes would have small spread and be centered at M=0")
# NUSE (Normalized Unscaled Standard Error)
boxplot(pset, col=cols, main= "NUSE (Normalized Unscaled Standard Error)",
xlab="High values of median NUSE are indicative of a problematic array")
# RNA degradation plot
RNAdeg <- AffyRNAdeg(da)
plotAffyRNAdeg(RNAdeg, cols=cols)
legend("topleft", sampleNames(da), lty=1,col=cols)
box()
#####################################################
## BACKGROUND CORRECT + NORMALIZE YOUR DATA USING RMA
#####################################################
library(gcrma)
da.n <- rma(da)
dim(da.n)
#####################################################
## EXTRACT THE NORMALIZED EXPRESSION VALUES INTO a MATRIX
#####################################################
da.eset <- exprs(da.n)
# look what is inside this normalized expression matrix
dim(da.eset)
colnames(da.eset)
# [1] "NSC_1F_iPS_1" "NSC_1F_iPS_2" "NSC_1F_iPS_3" "1F_iPS_NSC_1" "1F_iPS_NSC_2"
# [6] "1F_iPS_NSC_3" "NSC_1" "NSC_2" "NSC_3" "NSC_4"
#####################################################
## EXTRACT THE GENE SIGNIFICANTLY REGULATED USING RANK PRODUCT
#####################################################
# loading the Rank Product library
library(RankProd)
# Loading the annotation for the microarray
library(mouse4302.db)
gnames <- as.vector(unlist(as.list(mouse4302SYMBOL)))
# build a vector of the condition you want to compare
# here we will compare NSC to 1F iPS cells
cl <- c(rep(0,4), rep(1,3))
da.rp <- RP(da.eset[,c(7:10, 4:6)], cl=cl, logged=TRUE, num.perm=100, plot=FALSE, rand=5432)
# now we extract the genes with corrected p-value above 0.05
r.nsc.1fipsc <- topGene(da.rp, cutoff=0.05, method="pfp",logged=TRUE,logbase=2, gene.names=gnames)
#####################################################
## THE LISTS
#####################################################
# The genes significantly up-regulated
head(r.nsc.1fipsc$Table1, 20)
# how many genes are in table 2
dim(r.nsc.1fipsc$Table1)
# The genes significantly down-regulated
head(r.nsc.1fipsc$Table2, 20)
# how many genes are in table 2
dim(r.nsc.1fipsc$Table2)
#####################################################
## EXPORT THE LIST TO A TABULATED FILE
#####################################################
# up reg
x <- r.nsc.1fipsc$Table1
# replace the fold change value by their log2 counterpart
x[,3] <- log2(1/x[,3])
write.table(x, file = "NSC_vs_1F_iPSC_up.txt", sep = "\t", quote = FALSE, row.names=TRUE)
# down reg
x <- r.nsc.1fipsc$Table2
# replace the fold change value by their log2 counterpart
x[,3] <- log2(1/x[,3])
write.table(x, file = "NSC_vs_1F_iPSC_down.txt", sep = "\t", quote = FALSE, row.names=TRUE)
#####################################################
## SAVE YOUR WORKSPACE
#####################################################
save.image()