Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
RoyDibakar authored Oct 21, 2024
1 parent 129518e commit 41e9e0f
Showing 1 changed file with 125 additions and 0 deletions.
125 changes: 125 additions & 0 deletions R/New_codes_MDPD.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#1. Quality trimming of raw fastq data

trim_galore --paired --quality 20 --length 20 *.fastq.gz -o ./Filtered/ -j 10 --no_report_file

#2. Amplicon sequence data analysis using DADA2

setwd("/media/dibakar/New Volume1/MDPD_V2/IPF_DADA2/PRJNA357063/Filtered/")
library(dada2)
fnFs <- sort(list.files(pattern="_1.fastq.gz", full.names = TRUE))
fnRs <- sort(list.files(pattern="_2.fastq.gz", full.names = TRUE))
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
filtFs <- file.path("filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path("filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE,
compress=TRUE, multithread=TRUE)
head(out)
errF <- learnErrors(filtFs, multithread=TRUE)
errR <- learnErrors(filtRs, multithread=TRUE)
dadaFs <- dada(filtFs, err=errF, multithread=TRUE)
dadaRs <- dada(filtRs, err=errR, multithread=TRUE)
dadaFs[[1]]
mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE)
# head(mergers[[1]])
seqtab <- makeSequenceTable(mergers)
# dim(seqtab)
# table(nchar(getSequences(seqtab)))
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE)
# dim(seqtab.nochim)
# sum(seqtab.nochim)/sum(seqtab)
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
# head(track)
taxa <- assignTaxonomy(seqtab.nochim, "silva_nr99_v138.1_train_set.fa.gz", multithread=TRUE)
taxa.print <- taxa
rownames(taxa.print) <- NULL
# head(taxa.print)
library(phyloseq)
metadata = read.csv("PRJNA357063_metadta.csv", row.names = 1, header = TRUE)
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), sample_data(metadata), tax_table(taxa))
ps
dna <- Biostrings::DNAStringSet(taxa_names(ps))
names(dna) <- taxa_names(ps)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps)))
asv_seqs <- colnames(seqtab.nochim)
asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")
for (i in 1:dim(seqtab.nochim)[2]) {
asv_headers[i] <- paste(">ASV", i, sep="_")
}
Asv_fasta <- c(rbind(asv_headers, asv_seqs))
write(Asv_fasta, "ASVs.fa")
asv_tab <- t(seqtab.nochim)
row.names(asv_tab) <- sub(">", "", asv_headers)
write.table(asv_tab, "ASVs_counts.tsv", sep="\t", quote=F, col.names=NA)

#Krona Plot
library(psadd)
plot_krona(ps, "PRJNA357063", "SubGroup", trim = T)

#Downstream analysis
library(microeco)
library(file2meco)

#Phyloseq to meco obejct
meco_object <- phyloseq2meco(ps)
meco_object$tidy_dataset()

#Filter pollution
meco_object$filter_pollution(taxa = c("mitochondria", "chloroplast"))
meco_object$tidy_dataset()
#Filter based on abundance and detection
meco_object$filter_taxa(rel_abund = 0.0001, freq = 0.05)
meco_object$tidy_dataset()

#Box plot Phylum
t1 <- trans_abund$new(dataset = meco_object, taxrank = "Phylum", ntaxa = 10)
t1$plot_box(group = "Group", xtext_angle = 30)
write.csv(t1$data_abund, file = "Phylum_box.csv")
#Bar plot Genus
t2 <- trans_abund$new(dataset = meco_object, groupmean = "SubGroup", taxrank = "Genus")face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12))
g2 <- t2$plot_bar(others_color = "grey70", bar_full = FALSE, legend_text_italic = TRUE) + theme(axis.text.y = element_text(size = 12, vjust = 0.5, face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12))
g2
write.csv(g2$data, file = "Genus_Bar.csv")


#Heatmap run wise
t3 <- trans_abund$new(dataset = meco_object, taxrank = "Genus", ntaxa = 30)
g3 <- t3$plot_heatmap(facet = "Group", xtext_keep = FALSE, withmargin = FALSE, plot_breaks = c(0.01, 0.1, 1, 10))
g3
g3 + theme(axis.text.y = element_text(face = 'italic'))st_method = "none")
write.csv(g3$data, file = "Heatmap_run.csv")

#Differential
#Lefse
t4 <- trans_diff$new(dataset = meco_object, method = "lefse", group = "SubGroup", alpha = 0.05, lefse_subgroup = NULL, taxa_level = "Genus", p_adjust_method = "none")
# see t1$res_diff for the result
# From v0.8.0, threshold is used for the LDA score selection.
t4$plot_diff_bar(threshold = 2)
# we show 20 taxa with the highest LDA (log10)
t4$plot_diff_bar(use_number = 1:30, width = 0.8)
write.csv(t4$res_diff, file = "LDA_Bar.csv")

library(Maaslin2)
# input_data <- as.data.frame(t(read.delim(system.file('extdata','HMP2_taxonomy.tsv', package="Maaslin2"), row.names = 1)))
# input_metadata <- read.delim(system.file('extdata','HMP2_metadata.tsv', package="Maaslin2"), row.names = 1)
# library(microeco)
# d1 <- microtable$new(input_data, sample_table = input_metadata)
meco_object$taxa_abund$Genus <- meco_object$otu_table
t5 <- trans_diff$new(dataset = meco_object, method = "maaslin2",
fixed_effects = c('SubGroup', ''),
random_effects = 'NONE',
normalization = 'NONE',
reference = 'NONE',
standardize = FALSE,
taxa_level = "Genus", filter_thres = 0.001,
tmp_input_maaslin2 = "maaslin2_tmp_input3", tmp_output_maaslin2 = "maaslin2_tmp_output3")
View(t5$res_diff)
# use name column as x axis; delete the Env column. it is a minor bug in v1.5.0. Fixed in the v1.6.0
t5$res_diff <- t5$res_diff[, -3]
t5$plot_diff_bar(heatmap_cell = "coef", heatmap_lab_fill = "Coef", heatmap_x = "name", heatmap_y = "Taxa", keep_prefix = TRUE, add_sig = FALSE)
write.csv(t5$res_diff, file = "Maaslin2_Heatmap.csv")

0 comments on commit 41e9e0f

Please sign in to comment.