Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
QinZhen1995 authored May 7, 2023
1 parent 53537e5 commit d6c0be0
Show file tree
Hide file tree
Showing 13 changed files with 299,729 additions and 0 deletions.
144 changes: 144 additions & 0 deletions 01.RNA-seq/DEseqForKallisto.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/Rscript
suppressPackageStartupMessages({
library("data.table")
library(DESeq2)
library(ggplot2)
library(stringr)
library(dplyr)
library(ggpubr)
library(ggthemes)
library(tximport)
library(ggrepel)
library("tibble")
library("optparse")
library("BiocParallel")

})

option_list <- list(
make_option(c("-i", "--inputfile"), dest = "Rinput", default = "",
help="[opt] inputdir"),
make_option(c("-n", "--namefle"), dest = "namefile", default = "",
help="[opt] namefile"),
make_option(c("-c","--t2g"),dest = "t2gfile",default = "",
help = "[opt] t2g config file"),
make_option(c("-t","--contrast"),dest = "contrastfile",default = "",
help = "[opt] contrast config file"),
make_option(c("-p", "--padj"), dest = "p", default = "",
help="[opt] padj cutoff "),
make_option(c("-f", "--foldchange"), dest = "f", default = "0",
help="[opt] log2foldchange defalut 0"),
make_option(c("-x", "--threads"), dest = "x", default = "4",
help="[opt] threads defalut 4"),
make_option(c("-o", "--outprefix"), dest = "outfile", default = "",
help="[opt] DEseq2 out file")
)

parser <- OptionParser(usage = "%prog [options] file",
option_list=option_list, description = " 2019-3-24 \
Description: \
Wrapper for DEseq2.\
Example: \
DEseqForKallisto.R -i 01.kallisto -n design.conf -c t2g -t contrast.conf -x 1 -o out "
)
#



arguments <- parse_args(parser)
opt <- arguments$options
test <- arguments$Rinput
files = file.path(test ,list.files(test),"abundance.h5")

names(files) = list.files(test)

register(MulticoreParam(arguments$x))

if(test == "") {
print_help(parser)
stop(sprintf("input file is not specified"))
test = file("stdin")
}

designfile <- arguments$namefile

designfile = read.table(designfile,header=T,stringsAsFactors = F)

t2gfile <- arguments$t2gfile

tx2gene <- read.table(t2gfile,header=T)



contrastfile <- arguments$contrastfile

contrastfile = read.csv(contrastfile,sep="\t",header = FALSE,stringsAsFactors = F)



P <- as.numeric(arguments$p)
F <- as.numeric(arguments$f)
o <- arguments$outfile



txi = tximport(files, type = "kallisto", tx2gene = tx2gene,
txIn = TRUE, txOut = FALSE, countsFromAbundance = "no")


if( unique(colnames(txi$abundance) == designfile$sample) ) {
print("all samples matched")
} else {
print( "rename file from :" )
colnames(txi$abundance)
print("to :")
designfile$sample
}

dds <- DESeqDataSetFromTximport(txi,colData = designfile,design= ~ condition)

dds2 <- DESeq(dds,parallel = TRUE)

mergetable <- data.frame(row.names =row.names(dds2))

for (i in 1:nrow(contrastfile)) {
contrast_name <- paste0(as.character(unlist(contrastfile[i,]))[2:3],collapse = "vs")
contrast_vector <- as.vector(unlist(contrastfile[i,]))
results <- results(dds2, contrast = contrast_vector,parallel = T)
results <- lfcShrink(dds2, contrast = contrast_vector, res=results,parallel = T)
if( arguments$p == "") {
diff <- subset(results,(log2FoldChange > F|log2FoldChange < -F)) %>% as.data.frame()
}else {
diff <- subset(results, padj<P & (log2FoldChange > F|log2FoldChange < -F)) %>% as.data.frame()
}
diff2 <- subset(results) %>% as.data.frame()
diff2$logQ <- -log10(diff2$padj)
diff2$Group <- "Not-changed"
diff2$Label <- ""
diff2 <- diff2[order(diff2$padj),]
diff2$Group[which( (diff2$padj<0.05) & (diff2$log2FoldChange > F))] = "Up-regulated"
diff2$Group[which( (diff2$padj<0.05) & (diff2$log2FoldChange < -F) )] = "Down-regulated"
upgenes <- head(rownames(diff2[which(diff2$Group =="Up-regulated"),]),10)
downgenes <- head(rownames(diff2[which(diff2$Group =="Down-regulated"),]),10)
top10genes <- c(as.character(upgenes),as.character(downgenes))
diff2$Label[match(top10genes,rownames(diff2)) ] <- top10genes
Vplot <- ggscatter(diff2,x = "log2FoldChange",y = "logQ", color = "Group" , palette=c("#2f5688","#BBBBBB","#CC0000"),size=1,label =diff2$Label,font.label = 8,repel = T,xlab = "Log2FC",ylab="-Log10(padj)") + theme_base() +
geom_hline(yintercept = -log10(P), linetype="dashed")+
geom_vline(xintercept = -c(-F, F), linetype="dashed")
ggsave(paste0("./",contrast_name,"vp.pdf"),plot = Vplot)
colnames(diff) <- paste(contrast_name,colnames(diff),sep = "_")
mergetable <- merge(mergetable,diff[,c(2,6)],by="row.names",sort=FALSE,all=TRUE)
rownames(mergetable) <- mergetable$Row.names
mergetable$Row.names <- NULL
final <- merge(diff,as.data.frame(counts(dds2,normalize=TRUE)),by="row.names",sort=FALSE)
outfile <- paste(o,contrast_name,sep = ".")
write.table(final,file = outfile,row.names = FALSE,sep = "\t",quote = FALSE)
}


mergetable <- merge(mergetable,counts(dds2,normalize=TRUE),by="row.names",sort=FALSE,all=TRUE)



write.table(mergetable,file = paste0(o,"all"),row.names = FALSE,sep = "\t",quote = FALSE)

3 changes: 3 additions & 0 deletions 01.RNA-seq/contrast.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
condition KO_0h WT_0h
condition KO_3h WT_3h
condition KO_6h WT_6h
19 changes: 19 additions & 0 deletions 01.RNA-seq/design.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
sample condition reps
KO-0h-rep1 KO_0h 1
KO-0h-rep2 KO_0h 2
KO-0h-rep3 KO_0h 3
KO-3h-rep1 KO_3h 1
KO-3h-rep2 KO_3h 2
KO-3h-rep3 KO_3h 3
KO-6h-rep1 KO_6h 1
KO-6h-rep2 KO_6h 2
KO-6h-rep3 KO_6h 3
WT-0h-rep1 WT_0h 1
WT-0h-rep2 WT_0h 2
WT-0h-rep3 WT_0h 3
WT-3h-rep1 WT_3h 1
WT-3h-rep2 WT_3h 2
WT-3h-rep3 WT_3h 3
WT-6h-rep1 WT_6h 1
WT-6h-rep2 WT_6h 2
WT-6h-rep3 WT_6h 3
38 changes: 38 additions & 0 deletions 01.RNA-seq/kallisto_strand.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

pwd; hostname; mydate

mkdir -p 00.clean
mkdir -p 01.kallisto

for i in `echo ${1}`
do
(R1=`echo ${i} | awk -F "|" '{print$1}'`
R2=`echo ${i} | awk -F "|" '{print$2}'`
name=`echo ${i} | awk -F "|" '{print$1}' | cut -d '_' -f1`
echo "R1 fastq file:" ${R1}
echo "R2 fastq file:" ${R2}
echo " out prefix:" ${name}
echo "------fastp START-----"
fastp -i ./${R1} -I ./${R2} \
-o ./00.clean/${name}_R1_clean.fq.gz \
-O ./00.clean/${name}_R2_clean.fq.gz \
-3 -5 -W 6 -l 30 -c -h ./00.clean/${name}.html -z 6 -w 10
wait
echo `mydate` "------fastp END-----"
echo "------quant START-----"
kallisto quant \
-i ~/genome/index/IWGSC_v1_part/IWGSC_part_v1.1_HC_LC_kallisto \
-o ./01.kallisto/${name} \
--bias -t 1 \
-b 2 \
--single-overhang \
--rf-stranded \
--genomebam \
-g ~/genome/index/IWGSC_v1_part/IWGSC_part_v1.1_HC_LC.gtf \
-c ~/genome/index/IWGSC_v1_part/chrlength.tab \
./00.clean/${name}_R1_clean.fq.gz ./00.clean/${name}_R2_clean.fq.gz ) &
done
wait
mydate
echo '------ALL FINISH-----'
9 changes: 9 additions & 0 deletions 01.RNA-seq/mapping.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
WT-0h-rep1_R1.fastq.gz|WT-0h-rep1_R2.fastq.gz
WT-0h-rep2_R1.fastq.gz|WT-0h-rep2_R2.fastq.gz
WT-0h-rep3_R1.fastq.gz|WT-0h-rep3_R2.fastq.gz
WT-3h-rep1_R1.fastq.gz|WT-3h-rep1_R2.fastq.gz
WT-3h-rep2_R1.fastq.gz|WT-3h-rep2_R2.fastq.gz
WT-3h-rep3_R1.fastq.gz|WT-3h-rep3_R2.fastq.gz
WT-6h-rep1_R1.fastq.gz|WT-6h-rep1_R2.fastq.gz
WT-6h-rep2_R1.fastq.gz|WT-6h-rep2_R2.fastq.gz
WT-6h-rep3_R1.fastq.gz|WT-6h-rep3_R2.fastq.gz
Loading

0 comments on commit d6c0be0

Please sign in to comment.