-
Notifications
You must be signed in to change notification settings - Fork 6
/
run_burdenShift.R
125 lines (114 loc) · 5.5 KB
/
run_burdenShift.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Written by DMW, 11/7/17
# Edited by JA, 5/2/18
args <- commandArgs(trailingOnly=TRUE)
# Check if all required arguments are supplied
if (length(args)<5) {
stop("Missing 1 or more required arguments. Required: \n(1) <path to file with 'true' (observed) results>\n(2) <path to file with burden shift p-values from all annotation categories (rows) and permutations (columns)> \n(3) <path to table listing category names and category set membership ,as 0s and 1s in subsequent columns>\n(4) <number of permutations in results file (n columns - 1)>\n(5) <maximum p-value threshold for counting (2-sided binomial test)>\n(6) <name tag for output files>",call.=FALSE)
}
# Assign arguments to objects
burdenResFile <- args[1]
#
burdenShiftFile <- args[2]
#
catsFile <- args[3]
#
nPerms <- as.numeric(args[4])
#
maxP <- as.numeric(args[5])
if (maxP>1) {stop(paste("P-value threshold ",maxP," is greater than 1. Provide a p<1, e.g. 0.05."))}
#
if (length(args)==6) {
outtag <- args[6]
} else {
outtag <- "out"
}
#
rm(args)
# Load required packages
require(readr)
require(ggplot2)
# Function for counting categories
countCats <- function(pvals, pvalThresh) {
nCase <- length(pvals[pvals>0 & pvals<=pvalThresh])
nControl <- length(pvals[pvals<0 & abs(pvals)<=pvalThresh])
return(c("case"=nCase,"control"=nControl))
}
# Load observed results file (true data)
burdenRes <- read.delim(burdenResFile, sep="\t", header=T, stringsAsFactors=F)
# Load burden shift results file (permuted data)
burdenShift <- read_delim(burdenShiftFile, delim="\t", col_names=T, col_types=paste("c",paste(rep("d",times=nPerms),collapse=""),sep=""))
# Load file noting category sets to process
catsets <- read.delim(catsFile, sep="\t", header=T, stringsAsFactors=F)
# Initialize results objects
permCaseTab <- permControlTab <- c()
pdf(file=paste("plotDistr_p",maxP,"_",outtag,".pdf",sep=""), height=3, width=4)
# Loop through categories specified in the catsets object
for (i in 1:ncol(catsets)) {
# Define the name of the set, and the categories within it
if (i==1) {
setName <- "All"
testCats <- catsets$Annotation_combo
} else {
setName <- colnames(catsets)[i]
testCats <- catsets$Annotation_combo[catsets[,i]==1]
}
print(setName)
print(length(testCats))
#
# Subset true results to only the categories in the set
burdenResTrim <- data.frame(burdenRes[burdenRes$Annotation_combo %in% testCats,])
#
# Find case and control counts from this subset of categories
nObsCase <- nrow(burdenResTrim[burdenResTrim$Binom_p<=maxP & burdenResTrim$Adjusted_relative_risk>1,])
nObsControl <- nrow(burdenResTrim[burdenResTrim$Binom_p<=maxP & burdenResTrim$Adjusted_relative_risk<1,])
#
# Subset burden shift results to only the categories in the set
burdenShiftTrim <- data.frame(burdenShift[burdenShift$Annotation_combo %in% testCats,])
#
# Count up the number of case and control categories passing maxP
permCounts <- data.frame(t(apply(burdenShiftTrim[,2:ncol(burdenShiftTrim)], 2, countCats, pvalThresh=maxP)))
#
# Compare the observed counts to the permuted counts to calculate shift p-values
nPermCase <- nrow(permCounts[permCounts$case>=nObsCase,])
pCase <- nPermCase/nrow(permCounts)
nPermControl <- nrow(permCounts[permCounts$control>=nObsControl,])
pControl <- nPermControl/nrow(permCounts)
#
# Plot the results
ggpermCounts <- data.frame("N_signif_tests"=c(permCounts$case,permCounts$control))
myfontsize <- 10
larger <- max(c(nObsCase,nObsControl))
shiftDistPlot <- ggplot(ggpermCounts, aes(x=N_signif_tests)) +
geom_density(size=0.25, fill="gray92") +
geom_vline(mapping=aes(xintercept=nObsCase), size=0.25, colour="red", linetype=2) +
geom_vline(mapping=aes(xintercept=nObsControl), size=0.25, colour="blue", linetype=2) +
labs(x='Number of significant tests', y='Density', title=setName) +
theme_bw() +
theme(text=element_text(size=myfontsize), panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.title=element_text(size=myfontsize, face="bold"), plot.title=element_text(hjust=0.5,size=myfontsize, face="bold"), axis.text=element_text(size=myfontsize), strip.text=element_text(size=myfontsize)) +
annotate("text", x=nObsCase+(larger*0.02), y=Inf, vjust=4, hjust=0, label=pCase, colour="red") +
annotate("text", x=nObsControl+(larger*0.02), y=Inf, vjust=2, hjust=0, label=pControl, colour="blue")
print(shiftDistPlot)
#
# Add plot to figure document
#
# Add data to output object
permCaseTab <- data.frame(cbind(permCaseTab,permCounts$case))
permControlTab <- data.frame(cbind(permControlTab,permCounts$control))
colnames(permControlTab)[ncol(permControlTab)] <- colnames(permCaseTab)[ncol(permCaseTab)] <- setName
if (i==1) {
obsTab <- data.frame("Category_set"=setName,"N_cats_case"=nObsCase,"P_case"=pCase,"N_cats_control"=nObsControl,"P_control"=pControl, stringsAsFactors=F)
} else {
obsTab <- rbind(obsTab,c("Category_set"=setName,"N_cats_case"=nObsCase,"P_case"=pCase,"N_cats_control"=nObsControl,"P_control"=pControl))
}
}
dev.off()
# Write output data to file(s)
# File names
permCaseFile <- paste("nCatsCase_perm_p",maxP,"_",outtag,".txt",sep="")
permControlFile <- paste("nCatsControl_perm_p",maxP,"_",outtag,".txt",sep="")
obsFile <- paste("nCats_obs_p",maxP,"_",outtag,".txt",sep="")
#
# Write files
write.table(permCaseTab, file=permCaseFile, sep="\t", row.names=F, col.names=T, quote=F)
write.table(permControlTab, file=permControlFile, sep="\t", row.names=F, col.names=T, quote=F)
write.table(obsTab, file=obsFile, sep="\t", row.names=F, col.names=T, quote=F)