@@ -40,6 +40,8 @@ OTUs <- do.call(rbind, OTU.all)
40
40
rownames(OTUs ) <- OTUs $ OTUID
41
41
OTUs $ OTUID <- NULL
42
42
43
+ sum(OTUs )
44
+
43
45
devSVG(" figures/Hyena_OTU_heat.svg" , width = 14 , height = 14 )
44
46
pheatmap(log10(OTUs + 1 ),
45
47
show_rownames = FALSE ,
@@ -67,7 +69,39 @@ pheatmap(log10(OTUs+1),
67
69
dev.off()
68
70
69
71
72
+ # # a crude background reduction by setting all counts below an outlier detection to zero
73
+ # # http://stats.stackexchange.com/questions/56402/detecting-outliers-in-count-data
74
+ out.z <- function (x ){
75
+ trans <- log10(as.numeric(x ))
76
+ # # a trick to not assess the distribution of zeros, ones and twos
77
+ # # assumed here to be true negatives
78
+ NN <- which(trans > 0 )
79
+ rob.z <- (trans - median(trans [NN ]))/ mad(trans [NN ])
80
+ z.outl <- which(! rob.z > quantile(rob.z [NN ], 0.05 , na.rm = TRUE ))
81
+ }
82
+
83
+ for (i in 1 : nrow(OTUs )){
84
+ OTUs [i , out.z(OTUs [i ,])] <- 0
85
+ }
86
+
87
+ sum(OTUs )
88
+
89
+ # # 90,427 removed 06/09/2016
90
+
91
+ png(" figures/Hyena_OTU_heat_BCcor.png" , res = 300 , width = 1480 , height = 1480 )
92
+ pheatmap(log10(OTUs + 1 ),
93
+ show_rownames = FALSE ,
94
+ show_colnames = TRUE ,
95
+ treeheight_row = 0 ,
96
+ treeheight_col = 0 ,
97
+ annotation_col = data.frame (row.names = colnames(OTUs ),
98
+ is.control = as.numeric(
99
+ grepl(" H2O|Argave|Wolf|Paramix" ,
100
+ colnames(OTUs )))),
101
+ annotation_legend = FALSE )
102
+ dev.off()
70
103
104
+ sum(OTUs )
71
105
72
106
amplicon <- gsub(" OTU\\ d+\\ |" , " " , rownames(OTUs ))
73
107
@@ -91,8 +125,9 @@ pheatmap(log10(t(SUM.amp)+1),
91
125
annotation_legend = FALSE )
92
126
dev.off()
93
127
94
- TAX.raw <- read.csv(" /SAN/Metabarcoding/Hyena/second/sorted_amps/usearch/ALL_outs.taxtable" ,
95
- sep = " ," )
128
+ TAX.raw <-
129
+ read.csv(" /SAN/Metabarcoding/Hyena/second/sorted_amps/usearch/ALL_outs_nt.taxtable" ,
130
+ sep = " ," )
96
131
97
132
TAX.raw $ query <- gsub(" .fastq.otus.fa" , " " , TAX.raw $ query )
98
133
@@ -104,31 +139,39 @@ T.l <- by(TAX.raw, TAX.raw$query, function (x) {
104
139
all.best <- x [x $ bitscore == b.bit , ]
105
140
# ## A little last common ancestor play here... BUT wait ...
106
141
# # a shortcut throwing out OTUs that don't agree at least on the
107
- # # family level and allowing only the best hit afterwards
108
- u.family <- unique(all.best $ family )
142
+ # # class level and allowing only the best hit afterwards
143
+ u.family <- unique(all.best $ class )
109
144
if (length(u.family )== 1 ){
110
145
return (all.best )
111
146
}
112
147
})
113
148
114
149
115
150
TAX <- do.call(rbind , T .l )
116
- head (TAX [order(TAX $ amplicon ), ])
151
+ tail (TAX [order(TAX $ amplicon ), ])
117
152
rownames(TAX ) <- NULL
118
153
154
+
155
+
119
156
# # Only consider Euks now
120
157
TAX <- TAX [TAX $ superkingdom %in% " Eukaryota" , ]
121
158
122
159
# # remove some really weird stuff FIND later out where the errors
123
160
# # are!! Database errors...
124
161
table(TAX $ phylum )
125
162
163
+ # # should be fixed in database at some point... but now as a shortkut
164
+ # # here
126
165
TAX <- TAX [! TAX $ phylum %in% c(" Cnidaria" , " Porifera" ,
127
166
" Bacillariophyta" , # # maybe okay?
128
167
" Eustigmatophyceae" # # maybe okay?
129
168
) ,]
130
169
131
- table(TAX $ phylum )
170
+ # # ## now use only best hit
171
+ # # TAX <- TAX[!duplicated(TAX$query), ]
172
+ # # tail(TAX[order(TAX$amplicon), ])
173
+
174
+ # # table(TAX$phylum)
132
175
133
176
# ## Summarizing by class
134
177
foo <- merge(TAX , OTUs , by.x = " query" , by.y = 0 )
@@ -137,18 +180,22 @@ foobar <- foobar[order(rowSums(foobar), decreasing=TRUE), ]
137
180
foobar <- foobar [! rownames(foobar )%in% c(" " , " undef" ), ]
138
181
foobar <- foobar [, ! grepl(" H2O|Argave|Wolf" , colnames(foobar ))]
139
182
140
- foobar <- foobar [rowSums(foobar )> 116 , ]
141
-
142
183
mean.columns <- function (x ){
143
184
reps <- as.factor(gsub(" _S\\ d+$" , " " , colnames(x )))
144
185
y <- do.call(rbind , by(t(x ), reps , colMeans ))
145
186
t(y )
146
187
}
147
188
189
+ # # mean between replicates
148
190
baz <- mean.columns(foobar )
191
+ # # removing stuff with very low support from only one replicate
149
192
baz [baz < 1 ] <- 0
150
193
151
- devSVG(" figures/Hyena_class_sum_heat.svg" , width = 7 , height = 7 )
194
+ # # remove lowly represented classes
195
+ baz <- baz [rowSums(baz )> 20 , ]
196
+
197
+
198
+ devSVG(" figures/Hyena_class_sumNT_heat.svg" , width = 7 , height = 7 )
152
199
pheatmap(log10(baz + 1 ),
153
200
show_rownames = TRUE ,
154
201
show_colnames = FALSE ,
0 commit comments