-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcheck_cutoff_effect.R
87 lines (76 loc) · 4.25 KB
/
check_cutoff_effect.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#
# Supp-step 2: Check normalizaiton cutoff's effect on p-values
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
treatments <- c("normalizedData$Plants", "normalizedData$Plants:normalizedData$Water", "normalizedData$Season")
treatmentsLength <- length(treatments)
# 'order_counts.csv' is generated by 'compile_raw.R'
rawData <- read.csv('data/order_counts.csv', row.names=1, stringsAsFactors=F)
cutoffs <- sort(rawData$reads)
cutoffsLength <- length(cutoffs)
reps <- 100
pVals <- array(data=rep(1, treatmentsLength * cutoffsLength * reps), dim=c(treatmentsLength, cutoffsLength, reps), dimnames=c("Treatment", "Read Count", "Replicate"))
for (repIndex in 1:reps) {
for (cutoffIndex in 1:length(cutoffs)) {
minReads <- cutoffs[cutoffIndex]
rawTreatments <- subset(rawData[,c(1:6)], reads >= minReads)
rawReads <- subset(rawData, reads >= minReads)[,-c(1:6)]
# rarefaction (provided by 'vegan') selects a random subsample, n='minReads', of the raw data from each library
library(vegan)
normalizedData <- cbind(rawTreatments, rrarefy(rawReads, minReads))
normalizedData$reads <- minReads
# Randomly remove libraries until there is an equal number for all treatment combinations
#>>>START
trtCombos <- unique(rawData[,c(3:5)])
trtNames <- apply(trtCombos, 1, paste, collapse=" ")
trtLength <- length(trtNames)
trtCounts <- matrix(0, nrow=trtLength, ncol=2)
rownames(trtCounts) <- trtNames
colnames(trtCounts) <- c("Total", "n to remove")
# find the total number of libraries in each treatment combination
for (i in 1:dim(normalizedData)[1]) {
curTrt <- paste(normalizedData[i,c(3:5)], collapse=" ")
trtCounts[curTrt, "Total"] <- trtCounts[curTrt, "Total"] + 1
}
# find the minimum library count from all the treatment combinations
minCount <- min(trtCounts[,"Total"])
trtCounts[,"n to remove"] <- trtCounts[,"Total"] - minCount
librariesChosenForRemoval = vector()
for (i in 1:trtLength) {
curTrt <- trtCounts[trtNames[i],]
if (curTrt["n to remove"] != 0) {
potentialLibraries <- subset(normalizedData, Plants %in% trtCombos[i,] & Water %in% trtCombos[i,] & Season %in% trtCombos[i,])
chosenLibrary <- sample(rownames(potentialLibraries), as.numeric(curTrt["n to remove"]))
# select n libraries to remove
librariesChosenForRemoval <- append(librariesChosenForRemoval, chosenLibrary)
}
}
indexesChosenForRemoval <- match(librariesChosenForRemoval, rownames(normalizedData))
# perform the removal
normalizedData <- normalizedData[-indexesChosenForRemoval, ]
#>>>FINISH
if (dim(normalizedData)[1] > 1) {
# 'decostand' in the package 'vegan' normalized community data (counts of orders per sample)
# The method 'total' performs this normalization be dividing each order's count by the sample total
# i.e. each order's datum is now the relative abundance of that order in the sample
decostandData <- decostand(normalizedData[, -c(1:6)], "total")
# 'adonis' in the package 'vegan' performas a permutational analysis of variance
# In short, it shuffles community data between treatments 1000 times
# For each shuffle, it quantifies, proportionally, how much of the variance in dissimilarity is explained by various treatment combinations
# Finally, it gives the proportion of random shuffles for which the amount of variance explained by the treatments was equal to or greater than the amount of variance explained when the data were not shuffled
# The proprition of random shuffles with equally -effective- treatments is the p-value, the probability of obtaining our observed results under the null hypothesis that treatment doesn't affect dissimilarity
anova <- adonis(decostandData~normalizedData$Plants*normalizedData$Water*normalizedData$Season, permutations=9999)
aov.tab <- anova$aov.tab
for (treatmentIndex in 1:treatmentsLength) {
pVals[treatmentIndex, cutoffIndex, repIndex] <- aov.tab[treatments[treatmentIndex],"Pr(>F)"]
}
}
}
}
means <- apply(pVals, MARGIN=c(1,2), FUN=mean)
significantMeans <- means[,c(1:38)]
stdDevs <- apply(pVals, MARGIN=c(1,2), FUN=sd)
significantStdDevs <- stdDevs[,c(1:38)]
brplt <- barplot(significantMeans, col=rainbow(3), beside=T)
segments(brplt, significantMeans-significantStdDevs, brplt, significantMeans+significantStdDevs, col="black")
q(save="yes")