-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca_diffBind.R
73 lines (63 loc) · 1.94 KB
/
pca_diffBind.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
### FUNCTIONS ###
FX1=function(sample_sheet,out_name,n_cores=1,stats_out=F){
dba_data = dba(sampleSheet=sample_sheet, minOverlap=2,filter=0, bRemoveM=T) ## Generate a DBA objects
dba_data$config$cores = n_cores
dba_data = dba.count(dba_data,
minOverlap=2,
bUseSummarizeOverlaps = F,
bParallel = T,
filter=1,
summits = 200)
dba_data = dba.normalize(dba_data,normalize=DBA_NORM_LIB,bFullLibrarySize=TRUE)
stats=dba.show(dba_data)
stats=stats[,names(stats) %in% c("ID","Tissue","Factor","Intervals","Reads","FRiP")]
print(stats)
write.csv(stats, paste0("stats_",out_name,".csv"),row.names = F,quote=F)
pdf(paste0("pca_",out_name,".pdf"))
dba.plotPCA(dba_data,attributes = DBA_ID,label="ID",labelSize=0.5)
dev.off()
if(stats_out) {return(stats)}
}
### RUN ###
group_by1="Factor"
group_by2="Tissue"
group_by3="Treatment"
ss_file="samples.diffBind.csv"
n_cores=8
library(DiffBind)
options(scipen = 999)
ss = read.csv(ss_file)
ss[is.na(ss)]=0
groups1=unique(ss[,group_by1])
groups2=unique(ss[,group_by2])
groups3=unique(ss[,group_by3])
## add replicate ID
# for (group1 in groups1){
# for (group2 in groups2){
# for (group3 in groups3){
# idx = ss[,group_by1]==group1 & ss[,group_by2]==group2 & ss[,group_by3]==group3
# ss$Replicate[idx]=1:sum(idx)
# }
# }
# }
# make PCAs
for (group1 in groups1){
print(group1)
ss_i = ss[ss[,group_by1]==group1,]
if(nrow(ss_i)>2){
x=FX1(sample_sheet=ss_i,
out_name=group1,
n_cores = n_cores,stats_out=T)
}
if(group1==groups1[1]){ X = x } else { X=rbind(X,x) }
for (group2 in groups2){
print(group2)
ss_i = ss[ss[,group_by1]==group1 & ss[,group_by2]==group2,]
if(nrow(ss_i)>2){
FX1(sample_sheet=ss_i,
out_name=paste0(group1,"_",group2),
n_cores=n_cores)
}
}
}
write.csv(X,"stats_diffBind.csv",row.names = F,quote=F)