From 41e9e0fa9005fa975df0db099cca66f2b7c389e4 Mon Sep 17 00:00:00 2001 From: Dibakar Roy <137523251+RoyDibakar@users.noreply.github.com> Date: Mon, 21 Oct 2024 20:31:47 +0530 Subject: [PATCH] Add files via upload --- R/New_codes_MDPD.txt | 125 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 R/New_codes_MDPD.txt diff --git a/R/New_codes_MDPD.txt b/R/New_codes_MDPD.txt new file mode 100644 index 0000000..0f07dd0 --- /dev/null +++ b/R/New_codes_MDPD.txt @@ -0,0 +1,125 @@ +#1. Quality trimming of raw fastq data + +trim_galore --paired --quality 20 --length 20 *.fastq.gz -o ./Filtered/ -j 10 --no_report_file + +#2. Amplicon sequence data analysis using DADA2 + +setwd("/media/dibakar/New Volume1/MDPD_V2/IPF_DADA2/PRJNA357063/Filtered/") +library(dada2) +fnFs <- sort(list.files(pattern="_1.fastq.gz", full.names = TRUE)) +fnRs <- sort(list.files(pattern="_2.fastq.gz", full.names = TRUE)) +sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) +filtFs <- file.path("filtered", paste0(sample.names, "_F_filt.fastq.gz")) +filtRs <- file.path("filtered", paste0(sample.names, "_R_filt.fastq.gz")) +names(filtFs) <- sample.names +names(filtRs) <- sample.names +out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=TRUE, + compress=TRUE, multithread=TRUE) +head(out) +errF <- learnErrors(filtFs, multithread=TRUE) +errR <- learnErrors(filtRs, multithread=TRUE) +dadaFs <- dada(filtFs, err=errF, multithread=TRUE) +dadaRs <- dada(filtRs, err=errR, multithread=TRUE) +dadaFs[[1]] +mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE) +# head(mergers[[1]]) +seqtab <- makeSequenceTable(mergers) +# dim(seqtab) +# table(nchar(getSequences(seqtab))) +seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) +# dim(seqtab.nochim) +# sum(seqtab.nochim)/sum(seqtab) +getN <- function(x) sum(getUniques(x)) +track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim)) +colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") +rownames(track) <- sample.names +# head(track) +taxa <- assignTaxonomy(seqtab.nochim, "silva_nr99_v138.1_train_set.fa.gz", multithread=TRUE) +taxa.print <- taxa +rownames(taxa.print) <- NULL +# head(taxa.print) +library(phyloseq) +metadata = read.csv("PRJNA357063_metadta.csv", row.names = 1, header = TRUE) +ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), sample_data(metadata), tax_table(taxa)) +ps +dna <- Biostrings::DNAStringSet(taxa_names(ps)) +names(dna) <- taxa_names(ps) +ps <- merge_phyloseq(ps, dna) +taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps))) +asv_seqs <- colnames(seqtab.nochim) +asv_headers <- vector(dim(seqtab.nochim)[2], mode="character") +for (i in 1:dim(seqtab.nochim)[2]) { + asv_headers[i] <- paste(">ASV", i, sep="_") +} +Asv_fasta <- c(rbind(asv_headers, asv_seqs)) +write(Asv_fasta, "ASVs.fa") +asv_tab <- t(seqtab.nochim) +row.names(asv_tab) <- sub(">", "", asv_headers) +write.table(asv_tab, "ASVs_counts.tsv", sep="\t", quote=F, col.names=NA) + +#Krona Plot +library(psadd) +plot_krona(ps, "PRJNA357063", "SubGroup", trim = T) + +#Downstream analysis +library(microeco) +library(file2meco) + +#Phyloseq to meco obejct +meco_object <- phyloseq2meco(ps) +meco_object$tidy_dataset() + +#Filter pollution +meco_object$filter_pollution(taxa = c("mitochondria", "chloroplast")) +meco_object$tidy_dataset() +#Filter based on abundance and detection +meco_object$filter_taxa(rel_abund = 0.0001, freq = 0.05) +meco_object$tidy_dataset() + +#Box plot Phylum +t1 <- trans_abund$new(dataset = meco_object, taxrank = "Phylum", ntaxa = 10) +t1$plot_box(group = "Group", xtext_angle = 30) +write.csv(t1$data_abund, file = "Phylum_box.csv") +#Bar plot Genus +t2 <- trans_abund$new(dataset = meco_object, groupmean = "SubGroup", taxrank = "Genus")face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12)) +g2 <- t2$plot_bar(others_color = "grey70", bar_full = FALSE, legend_text_italic = TRUE) + theme(axis.text.y = element_text(size = 12, vjust = 0.5, face = "bold"), axis.text.x = element_text(size = 16, face = "bold"),panel.background = element_blank(), axis.title = element_text(size = 12, face = "bold"), axis.line = element_line(colour = "black"), legend.text = element_text(size = 12)) +g2 +write.csv(g2$data, file = "Genus_Bar.csv") + + +#Heatmap run wise +t3 <- trans_abund$new(dataset = meco_object, taxrank = "Genus", ntaxa = 30) +g3 <- t3$plot_heatmap(facet = "Group", xtext_keep = FALSE, withmargin = FALSE, plot_breaks = c(0.01, 0.1, 1, 10)) +g3 +g3 + theme(axis.text.y = element_text(face = 'italic'))st_method = "none") +write.csv(g3$data, file = "Heatmap_run.csv") + +#Differential +#Lefse +t4 <- trans_diff$new(dataset = meco_object, method = "lefse", group = "SubGroup", alpha = 0.05, lefse_subgroup = NULL, taxa_level = "Genus", p_adjust_method = "none") +# see t1$res_diff for the result +# From v0.8.0, threshold is used for the LDA score selection. +t4$plot_diff_bar(threshold = 2) +# we show 20 taxa with the highest LDA (log10) +t4$plot_diff_bar(use_number = 1:30, width = 0.8) +write.csv(t4$res_diff, file = "LDA_Bar.csv") + +library(Maaslin2) +# input_data <- as.data.frame(t(read.delim(system.file('extdata','HMP2_taxonomy.tsv', package="Maaslin2"), row.names = 1))) +# input_metadata <- read.delim(system.file('extdata','HMP2_metadata.tsv', package="Maaslin2"), row.names = 1) +# library(microeco) +# d1 <- microtable$new(input_data, sample_table = input_metadata) +meco_object$taxa_abund$Genus <- meco_object$otu_table +t5 <- trans_diff$new(dataset = meco_object, method = "maaslin2", + fixed_effects = c('SubGroup', ''), + random_effects = 'NONE', + normalization = 'NONE', + reference = 'NONE', + standardize = FALSE, + taxa_level = "Genus", filter_thres = 0.001, + tmp_input_maaslin2 = "maaslin2_tmp_input3", tmp_output_maaslin2 = "maaslin2_tmp_output3") +View(t5$res_diff) +# use name column as x axis; delete the Env column. it is a minor bug in v1.5.0. Fixed in the v1.6.0 +t5$res_diff <- t5$res_diff[, -3] +t5$plot_diff_bar(heatmap_cell = "coef", heatmap_lab_fill = "Coef", heatmap_x = "name", heatmap_y = "Taxa", keep_prefix = TRUE, add_sig = FALSE) +write.csv(t5$res_diff, file = "Maaslin2_Heatmap.csv")