Skip to content

Latest commit

 

History

History
148 lines (136 loc) · 9.32 KB

001.RNA-seq quantification (conducted by Mengyao Guo).md

File metadata and controls

148 lines (136 loc) · 9.32 KB

1. Upstream analysis of RNA seq data (conducted by Mengyao Guo)

#poppy_RNAseq
#-----------------------------------------------------------------
#pathway for files
#pathway for task.pbs
/home/myguo/poppy_RNAseq/pipeline_for_poppy/task.pbs
#pathway for poppy RNA-seq raw data
/home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/
#pathway for poppy atac-seq raw data
/home/DATA/Arabidopsis_poppy/poppy_ATAC/xy_merge_all_atac_fq_document_200820
#pathway for reference
/home/xfyang/poppy_project/HN1_DCW_YMR_fianl_uploaded_210510/Papaver_somniferum.*

#pathway for poppy RNA-seq result
/home/myguo/poppy_RNAseq/result

#running 
pbs+sh
#-----------------------------------------------------------------
#software load
conda install -c bioconda fastqc
conda install -c bioconda multiqc
#-----------------------------------------------------------------
#cheek downlod files
#md5sum -c /home/myguo/timetrans/result/2.cleandata/*r/MD5*.txt
#-----------------------------------------------------------------

#-----------------------------------------------------------------
#fastqc
cd /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu
for dir in /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/* ; do
	    cd $dir;
	    cd clean ;
        fastqc *.gz -t 28 -o /home/myguo/poppy_RNAseq/result/fastqc;
        cd ..;
        cd ..;
done
echo "OK"
#multiqc
multiqc /home/myguo/poppy_RNAseq/result/fastqc -o /home/myguo/poppy_RNAseq/result/multiqc
#-----------------------------------------------------------------
#bowtie-cufflinks
#-----------------------------------------------------------------
#bowtie2-build index
bowtie2-build /home/xfyang/poppy_project/HN1_DCW_YMR_fianl_uploaded_210510/Papaver_somniferum.ref.fa /home/myguo/poppy_RNAseq/ref/Papaver_somniferum_ref_index
#bowtie2 alignment
#for dir in /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/* ;do echo $dir;done3
cd /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/
for dir in $(ls);do
	echo $dir;
	mkdir /home/myguo/poppy_RNAseq/result/align/$dir/;
	bowtie2 -p 28 -x /home/myguo/poppy_RNAseq/ref/Papaver_somniferum_ref_index -1 /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/$dir/clean/*1_1_clean* -2 /home/DATA/Arabidopsis_poppy/X101SC19041787-Z02_yingsu_data_Release_20191217/RNA/yingsu/$dir/clean/*1_2_clean* -S /home/myguo/poppy_RNAseq/result/align/$dir/$dir.sam &>> /home/myguo/poppy_RNAseq/result/bowtie2alignsum.txt
	echo "#-----------------------------------" >> /home/myguo/poppy_RNAseq/result/bowtie2alignsum.txt
	samtools view -@ 28 -bS -q 10 /home/myguo/poppy_RNAseq/result/align/$dir/$dir.sam > /home/myguo/poppy_RNAseq/result/align/$dir/$dir.bam
	samtools sort -@ 28 /home/myguo/poppy_RNAseq/result/align/$dir/$dir.bam -o /home/myguo/poppy_RNAseq/result/align/$dir/sorted_$dir.bam
	samtools index -@ 28 /home/myguo/poppy_RNAseq/result/align/$dir/sorted_$dir.bam /home/myguo/poppy_RNAseq/result/align/$dir/sorted_$dir.bam.index
	rm /home/myguo/poppy_RNAseq/result/align/$dir/$dir.sam
	rm /home/myguo/poppy_RNAseq/result/align/$dir/$dir.bam
	samtools flagstat -@ 28 /home/myguo/poppy_RNAseq/result/align/$dir/*.bam >> /home/myguo/poppy_RNAseq/result/flagstat.txt
	echo "#-----------------------------------" >> /home/myguo/poppy_RNAseq/result/flagstat.txt
	#bam_stat.py -i /home/myguo/timetrans/result/2.cleandata/$dir/sorted_$dir.bam > /home/myguo/timetrans/result/2.cleandata/$dir/bam_stat_$dir.log
	cd ..;
done
#------------------------------------------------------------------
#cufflinks-cuffmerge-cuffdiff

source activate python27
#gff3 to gtf
gffread /home/xfyang/poppy_project/HN1_DCW_YMR_fianl_uploaded_210510/Papaver_somniferum.gene.gff3 -T -o /home/myguo/poppy_RNAseq/ref/ref/Papaver_somniferum.gene.gtf
#cufflinks
cd /home/myguo/poppy_RNAseq/result/align/
for dir in $(ls);do
	echo $dir;
	cd $dir;
    mkdir /home/myguo/poppy_RNAseq/result/align/$dir/cufflinksG/;
    cufflinks -o /home/myguo/poppy_RNAseq/result/align/$dir/cufflinksG/cufflink_$dir -p 28 -G /home/myguo/poppy_RNAseq/ref/ref/Papaver_somniferum.gene.gtf /home/myguo/poppy_RNAseq/result/align/$dir/*.bam
    cd ..;
done
echo "OK" 

cd /home/myguo/poppy_RNAseq/result/align/
for dir in $(ls); do
	cd /home/myguo/poppy_RNAseq/result/align/$dir/cufflinksG/cufflink_$dir/;
    for file in /home/myguo/poppy_RNAseq/result/align/$dir/cufflinksG/cufflink_$dir/* ;do  
    	echo $file >> /home/myguo/poppy_RNAseq/result/assembly_for_cuff_G.txt; done
    cd /home/myguo/poppy_RNAseq/result/align/;
done
echo "OK"

awk 'NR%4'==0 /home/myguo/poppy_RNAseq/result/assembly_for_cuff_G.txt > /home/myguo/poppy_RNAseq/result/assembly_trans_cuff_G.txt
#cuffmerge -o /home/myguo/poppy_RNAseq/result/merged_asm_G -g /home/myguo/poppy_RNAseq/ref/ref/Papaver_somniferum.gene.gtf -s /home/xfyang/poppy_project/HN1_DCW_YMR_fianl_uploaded_210510/Papaver_somniferum.ref.fa -p 28 /home/myguo/poppy_RNAseq/result/assembly_trans_cuff_G.txt
echo "OK"
#cuffdiff
#awk 'NR%4'==3 /home/myguo/poppy_RNAseq/result/sorted_bam_full_pathway.txt > /home/myguo/poppy_RNAseq/result/sorted_bam_pathway.txt.txt
cuffdiff -o /home/myguo/poppy_RNAseq/result/diff_tissue_A_out --labels Capsule,Petal,Stem,Fineroot,Leaf,Taproot --library-norm-method classic-fpkm -b /home/xfyang/poppy_project/HN1_DCW_YMR_fianl_uploaded_210510/Papaver_somniferum.ref.fa -p 27 -u /home/myguo/poppy_RNAseq/ref/ref/Papaver_somniferum.gene.gtf /home/myguo/poppy_RNAseq/result/align/guo/sorted_guo_rep1.bam,/home/myguo/poppy_RNAseq/result/align/guo2/sorted_guo_rep2.bam,/home/myguo/poppy_RNAseq/result/align/guo3/sorted_guo_rep3.bam /home/myguo/poppy_RNAseq/result/align/hua/sorted_hua_rep1.bam,/home/myguo/poppy_RNAseq/result/align/hua2/sorted_hua_rep2.bam,/home/myguo/poppy_RNAseq/result/align/hua3/sorted_hua_rep3.bam /home/myguo/poppy_RNAseq/result/align/jing/sorted_jing_rep1.bam,/home/myguo/poppy_RNAseq/result/align/jing2/sorted_jing_rep2.bam,/home/myguo/poppy_RNAseq/result/align/jing3/sorted_jing_rep3.bam /home/myguo/poppy_RNAseq/result/align/xugen1/sorted_xugen_rep1.bam,/home/myguo/poppy_RNAseq/result/align/xugen2/sorted_xugen_rep2.bam,/home/myguo/poppy_RNAseq/result/align/xugen3/sorted_xugen_rep3.bam /home/myguo/poppy_RNAseq/result/align/ye/sorted_ye_rep1.bam,/home/myguo/poppy_RNAseq/result/align/ye2/sorted_ye_rep2.bam,/home/myguo/poppy_RNAseq/result/align/ye3/sorted_ye_rep3.bam /home/myguo/poppy_RNAseq/result/align/zhugen1/sorted_zhugen_rep1.bam,/home/myguo/poppy_RNAseq/result/align/zhugen2/sorted_zhugen_rep2.bam,/home/myguo/poppy_RNAseq/result/align/zhugen3/sorted_zhugen_rep3.bam
#---------------------------------------------------------------------
#---------------------------------------------------------------------
#FPKM to TPM
#---------------------------------------------------------------------

external_scaled_frags<-read.table("genes.read_group_tracking",header=TRUE,sep="\t")
sample<-c("Capsule","Petal","Stem","Fineroot","Leaf","Taproot")
sample_length<-c(1:length(sample))
replicate<-c("0","1","2")
select_gene_read_group_tracking<-data.frame()
for(i in sample_length){
  for(j in c(1:length(replicate))){
    single_sample<-subset(external_scaled_frags,condition==sample[i] & replicate==replicate[j],select=c(tracking_id,external_scaled_frags,FPKM))
    names(single_sample)<-c(paste(sample[i],replicate[j],"tracking_id",sep="_"),paste(sample[i],replicate[j],sep=""),paste(sample[i],replicate[j],sep=""))
    assign(paste(sample[i],replicate[j],sep=""),single_sample)
}
}
select_gene_read_group_tracking<-cbind( Capsule0,Capsule1,Capsule2,Petal0,Petal1,Petal2,Stem0,Stem1,Stem2,Fineroot0,Fineroot1,Fineroot2,Leaf0,Leaf1,Leaf2,Taproot0,Taproot1,Taproot2 )
select_external_frags<-cbind(select_gene_read_group_tracking[,1],select_gene_read_group_tracking[,seq(2,ncol(select_gene_read_group_tracking),3)])
select_external_frags_fpkm<-cbind(select_gene_read_group_tracking[,1],select_gene_read_group_tracking[,seq(3,ncol(select_gene_read_group_tracking),3)])
#change colnames1 to tracking_id
colnames(select_external_frags)[1]<-'tracking_id'
colnames(select_external_frags_fpkm)[1]<-'tracking_id'
#SET TRACKIG-ID AS ROWNAMES
rownames(select_external_frags_fpkm)<-select_external_frags_fpkm[,1]
select_external_frags_fpkm<-select_external_frags_fpkm[,-1]
rownames(select_external_frags)<-select_external_frags[,1]
select_external_frags<-select_external_frags[,-1]
fpkmToTpm <- function(fpkm)
{
    exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
tpms <- apply(select_external_frags_fpkm,2,fpkmToTpm)
colSums(tpms)
nn<-c( "Capsule1","Capsule0","Capsule2","Petal1","Petal0","Petal2","Stem1","Stem0","Stem2","Fineroot1","Fineroot0","Fineroot2","Leaf1","Leaf0","Leaf2","Taproot1","Taproot0","Taproot2" )
> nn
 [1] "Capsule1"  "Capsule0"  "Capsule2"  "Petal1"    "Petal0"   
 [6] "Petal2"    "Stem1"     "Stem0"     "Stem2"     "Fineroot1"
[11] "Fineroot0" "Fineroot2" "Leaf1"     "Leaf0"     "Leaf2"    
[16] "Taproot1"  "Taproot0"  "Taproot2" 
colnames(select_external_frags)<-nn
colnames(select_external_frags_fpkm)<-nn
colnames(tpms)<-nn
write.csv(select_external_frags,file="select_external_frags.csv")
write.csv(select_external_frags_fpkm,file="select_external_frags_fpkm.csv")
write.csv(tpms,file="select_external_frags_tpm.csv")