-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpful-scripts-from-adina.R
108 lines (91 loc) · 4.7 KB
/
helpful-scripts-from-adina.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#devtools::install_github("sjackman/uniqtag")
library(plyr)
library(uniqtag)
library(ggplot2)
setwd("~/Box Sync/biomark")
raw_data_file = "A1.1 Raw Data.csv"
raw_data <- read.csv(raw_data_file, sep=",", row.names=1, header = TRUE)
dim(raw_data)
head(raw_data)
raw_data$full_assay <- paste(raw_data$Name,"-", raw_data$Name.1, sep="")
raw_data$unique_id <- make_unique(raw_data$full_assay)
# Metadata fie has to have a header names "sample_type" where you specify standards or env sample
# You should have metadata for both samples and standards
# It would be better if the "standard sample" had the appropriate dilution
meta_data_file = "metadata.csv"
meta <- read.csv(meta_data_file, header=TRUE)
# Check the NTCs
ct_threshold_ntc = 20 #Should be set by you
ntc <- subset(raw_data, Name == "NTC")
summary(ntc$Value)
check_ntc <- subset(ntc, Value > 20 & Value < 999)
check_ntc$Value
data_nontc <- subset(raw_data, Name != "NTC")
merged <- merge(data_nontc, meta, by = "Name")
#Standards Workflow
standards <- subset(merged, sample_type == "standard")
gene_list <- unique(standards$Name)
for (x in 1:length(gene_list)) {
each_gene <- subset(standards, Name == gene_list[x])
standard_only = paste(gene_list[x], '-', gene_list[x], sep='')
each_gene_filtered = subset(each_gene, full_assay == standard_only)
#print(each_gene_filtered)
each_gene_badfiltered = subset(each_gene, full_assay != standard_only)
each_gene_badfiltered = subset(each_gene_badfiltered, Value < 999) # This can be used to filter
#print(each_gene_badfiltered)
f <- ddply(each_gene_filtered, .(rConc, full_assay), summarise, MEAN = mean(Value), SE=sd(Value)/sqrt(length(Value)))
limits<-aes(ymin=MEAN-SE, ymax=MEAN+SE)
f$log <- log10(f$rConc)
p = ggplot(f, aes_string(x="log", y="MEAN", color="full_assay"))
p = p + geom_point() + geom_errorbar(position=position_dodge(), limits)+theme(axis.text.x = element_text(angle = 90))
p
ggsave(paste0(x,'standard.pdf'), device="pdf")
f <- ddply(each_gene_badfiltered, .(rConc, full_assay), summarise, MEAN = mean(Value), SE=sd(Value)/sqrt(length(Value)))
limits<-aes(ymin=MEAN-SE, ymax=MEAN+SE)
f$log <- log10(f$rConc)
p = ggplot(f, aes_string(x="log", y="MEAN", color="full_assay"))
p = p + geom_point() + geom_errorbar(position=position_dodge(), limits)+theme(axis.text.x = element_text(angle = 90))
p
ggsave(paste0(x,'nonstandard.pdf'), device="pdf")
}
#Samples Workflow
samples <- subset(merged, sample_type == "env") #all samples
#average across technical replicates
f <- ddply(samples, .(full_assay, location, plot, time, matrix, Name, Name.1), summarise, MEAN = mean(Value), SE=sd(Value)/sqrt(length(Value)))
#genes that are detected #stats are done prior to removing bad measurements
f_detects <- subset(f, MEAN < 999)
print("Number of Genes with detects: ")
length(unique(f_detects$Name.1))
unique(f_detects$Name.1)
#Analysis likely useful by gene or by sample
gene_list <- unique(f$Name.1)
for (x in 1:length(gene_list)) {
each_gene <- subset(f, Name.1 == gene_list[x])
limits<-aes(ymin=MEAN-SE, ymax=MEAN+SE)
f2 <- subset(each_gene, MEAN < 25) #Can set as appropriate - filter by MEAN technical rep Ct
p = ggplot(f2, aes_string(x="full_assay", y="MEAN", color="matrix"))
p = p + geom_point() + geom_errorbar(position=position_dodge(), limits)+theme(axis.text.x = element_text(angle = 90))
p
write.csv(file=paste0(x,".sample.csv"), each_gene, quote=FALSE, row.names=FALSE)
ggsave(paste0(x,'sample.pdf'), device="pdf")
}
for (x in 1:length(gene_list)) {
each_gene <- subset(f, Name.1 == gene_list[x])
#Note this is going to take the average of the averages, not sure if this is to be done
f_plot <-ddply(each_gene, .(plot, Name.1), summarise, MEAN2 = mean(MEAN), SE=sd(MEAN)/sqrt(length(MEAN)))
f2 <- subset(f_plot, MEAN2 < 25) #Can set as appropriate - filter by MEAN technical rep Ct
limits<-aes(ymin=MEAN2-SE, ymax=MEAN2+SE)
p = ggplot(f2, aes_string(x="plot", y="MEAN2", color="Name.1"))
p = p + geom_point() + geom_errorbar(position=position_dodge(), limits)+theme(axis.text.x = element_text(angle = 90))
p
write.csv(file=paste0(x,".byplot.csv"), each_gene, quote=FALSE, row.names=FALSE)
ggsave(paste0(x,'byplot.pdf'), device="pdf")
}
each_gene <- subset(f, Name.1 == gene_list[x])
#EVERYTHING!
#Note this is going to take the average of the averages, not sure if this is to be done
f_plot <-ddply(f, .(plot, Name.1), summarise, MEAN2 = mean(MEAN), SE2=sd(MEAN)/sqrt(length(MEAN)))
f2 <- subset(f_plot, MEAN2 < 25) #Can set as appropriate - filter by MEAN technical rep Ct
limits2<-aes(ymin=MEAN2-SE2, ymax=MEAN2+SE2)
p = ggplot(f2, aes_string(x="plot", y="MEAN2", color="Name.1"))
p + geom_point() + geom_errorbar(width=0.25, limits2)+theme(axis.text.x = element_text(angle = 90))