-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
129518e
commit 41e9e0f
Showing
1 changed file
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#1. Quality trimming of raw fastq data | ||
|
||
trim_galore --paired --quality 20 --length 20 *.fastq.gz -o ./Filtered/ -j 10 --no_report_file | ||
|
||
#2. Amplicon sequence data analysis using DADA2 | ||
|
||
setwd("/media/dibakar/New Volume1/MDPD_V2/IPF_DADA2/PRJNA357063/Filtered/") | ||
library(dada2) | ||
fnFs <- sort(list.files(pattern="_1.fastq.gz", full.names = TRUE)) | ||
fnRs <- sort(list.files(pattern="_2.fastq.gz", full.names = TRUE)) | ||
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) | ||
filtFs <- file.path("filtered", paste0(sample.names, "_F_filt.fastq.gz")) | ||
filtRs <- file.path("filtered", paste0(sample.names, "_R_filt.fastq.gz")) | ||
names(filtFs) <- sample.names | ||
names(filtRs) <- sample.names | ||
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, | ||
compress=TRUE, multithread=TRUE) | ||
head(out) | ||
errF <- learnErrors(filtFs, multithread=TRUE) | ||
errR <- learnErrors(filtRs, multithread=TRUE) | ||
dadaFs <- dada(filtFs, err=errF, multithread=TRUE) | ||
dadaRs <- dada(filtRs, err=errR, multithread=TRUE) | ||
dadaFs[[1]] | ||
mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE) | ||
# head(mergers[[1]]) | ||
seqtab <- makeSequenceTable(mergers) | ||
# dim(seqtab) | ||
# table(nchar(getSequences(seqtab))) | ||
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) | ||
# dim(seqtab.nochim) | ||
# sum(seqtab.nochim)/sum(seqtab) | ||
getN <- function(x) sum(getUniques(x)) | ||
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim)) | ||
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") | ||
rownames(track) <- sample.names | ||
# head(track) | ||
taxa <- assignTaxonomy(seqtab.nochim, "silva_nr99_v138.1_train_set.fa.gz", multithread=TRUE) | ||
taxa.print <- taxa | ||
rownames(taxa.print) <- NULL | ||
# head(taxa.print) | ||
library(phyloseq) | ||
metadata = read.csv("PRJNA357063_metadta.csv", row.names = 1, header = TRUE) | ||
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), sample_data(metadata), tax_table(taxa)) | ||
ps | ||
dna <- Biostrings::DNAStringSet(taxa_names(ps)) | ||
names(dna) <- taxa_names(ps) | ||
ps <- merge_phyloseq(ps, dna) | ||
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps))) | ||
asv_seqs <- colnames(seqtab.nochim) | ||
asv_headers <- vector(dim(seqtab.nochim)[2], mode="character") | ||
for (i in 1:dim(seqtab.nochim)[2]) { | ||
asv_headers[i] <- paste(">ASV", i, sep="_") | ||
} | ||
Asv_fasta <- c(rbind(asv_headers, asv_seqs)) | ||
write(Asv_fasta, "ASVs.fa") | ||
asv_tab <- t(seqtab.nochim) | ||
row.names(asv_tab) <- sub(">", "", asv_headers) | ||
write.table(asv_tab, "ASVs_counts.tsv", sep="\t", quote=F, col.names=NA) | ||
|
||
#Krona Plot | ||
library(psadd) | ||
plot_krona(ps, "PRJNA357063", "SubGroup", trim = T) | ||
|
||
#Downstream analysis | ||
library(microeco) | ||
library(file2meco) | ||
|
||
#Phyloseq to meco obejct | ||
meco_object <- phyloseq2meco(ps) | ||
meco_object$tidy_dataset() | ||
|
||
#Filter pollution | ||
meco_object$filter_pollution(taxa = c("mitochondria", "chloroplast")) | ||
meco_object$tidy_dataset() | ||
#Filter based on abundance and detection | ||
meco_object$filter_taxa(rel_abund = 0.0001, freq = 0.05) | ||
meco_object$tidy_dataset() | ||
|
||
#Box plot Phylum | ||
t1 <- trans_abund$new(dataset = meco_object, taxrank = "Phylum", ntaxa = 10) | ||
t1$plot_box(group = "Group", xtext_angle = 30) | ||
write.csv(t1$data_abund, file = "Phylum_box.csv") | ||
#Bar plot Genus | ||
t2 <- trans_abund$new(dataset = meco_object, groupmean = "SubGroup", taxrank = "Genus")face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12)) | ||
g2 <- t2$plot_bar(others_color = "grey70", bar_full = FALSE, legend_text_italic = TRUE) + theme(axis.text.y = element_text(size = 12, vjust = 0.5, face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12)) | ||
g2 | ||
write.csv(g2$data, file = "Genus_Bar.csv") | ||
|
||
|
||
#Heatmap run wise | ||
t3 <- trans_abund$new(dataset = meco_object, taxrank = "Genus", ntaxa = 30) | ||
g3 <- t3$plot_heatmap(facet = "Group", xtext_keep = FALSE, withmargin = FALSE, plot_breaks = c(0.01, 0.1, 1, 10)) | ||
g3 | ||
g3 + theme(axis.text.y = element_text(face = 'italic'))st_method = "none") | ||
write.csv(g3$data, file = "Heatmap_run.csv") | ||
|
||
#Differential | ||
#Lefse | ||
t4 <- trans_diff$new(dataset = meco_object, method = "lefse", group = "SubGroup", alpha = 0.05, lefse_subgroup = NULL, taxa_level = "Genus", p_adjust_method = "none") | ||
# see t1$res_diff for the result | ||
# From v0.8.0, threshold is used for the LDA score selection. | ||
t4$plot_diff_bar(threshold = 2) | ||
# we show 20 taxa with the highest LDA (log10) | ||
t4$plot_diff_bar(use_number = 1:30, width = 0.8) | ||
write.csv(t4$res_diff, file = "LDA_Bar.csv") | ||
|
||
library(Maaslin2) | ||
# input_data <- as.data.frame(t(read.delim(system.file('extdata','HMP2_taxonomy.tsv', package="Maaslin2"), row.names = 1))) | ||
# input_metadata <- read.delim(system.file('extdata','HMP2_metadata.tsv', package="Maaslin2"), row.names = 1) | ||
# library(microeco) | ||
# d1 <- microtable$new(input_data, sample_table = input_metadata) | ||
meco_object$taxa_abund$Genus <- meco_object$otu_table | ||
t5 <- trans_diff$new(dataset = meco_object, method = "maaslin2", | ||
fixed_effects = c('SubGroup', ''), | ||
random_effects = 'NONE', | ||
normalization = 'NONE', | ||
reference = 'NONE', | ||
standardize = FALSE, | ||
taxa_level = "Genus", filter_thres = 0.001, | ||
tmp_input_maaslin2 = "maaslin2_tmp_input3", tmp_output_maaslin2 = "maaslin2_tmp_output3") | ||
View(t5$res_diff) | ||
# use name column as x axis; delete the Env column. it is a minor bug in v1.5.0. Fixed in the v1.6.0 | ||
t5$res_diff <- t5$res_diff[, -3] | ||
t5$plot_diff_bar(heatmap_cell = "coef", heatmap_lab_fill = "Coef", heatmap_x = "name", heatmap_y = "Taxa", keep_prefix = TRUE, add_sig = FALSE) | ||
write.csv(t5$res_diff, file = "Maaslin2_Heatmap.csv") |