Skip to content

Latest commit

 

History

History
110 lines (92 loc) · 5.31 KB

RNA-seq分析番外篇.md

File metadata and controls

110 lines (92 loc) · 5.31 KB

目录

RNA-seq分析番外篇

RNA-seq分析番外篇

过滤细菌库 目录

与细菌库比对(进行单端mapping)

bwa aln -t 10 -f $wd/$map_dir/${1}_BacLib_1.sai $wd/Ref/NCBI_bacteria/referance.fasta \
	$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_1.sai.log 2>&1
echo "[filt] Finish searching SA coordinate for ${1}_Clean_Data1.${mate_str}fq"
bwa aln -t 10 -f $wd/$map_dir/${1}_BacLib_2.sai $wd/Ref/NCBI_bacteria/referance.fasta \
	$wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_2.sai.log 2>&1
echo "[filt] Finish searching SA coordinate for ${1}_Clean_Data2.${mate_str}fq"
bwa samse -f $wd/$map_dir/${1}_BacLib_1.sam $wd/Ref/NCBI_bacteria/referance.fasta $wd/$map_dir/${1}_BacLib_1.sai \
	$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_1.sam.log 2>&1
echo "[filt] Finish single-end mapping for ${1}_Clean_Data1.${mate_str}fq"
bwa samse -f $wd/$map_dir/${1}_BacLib_2.sam $wd/Ref/NCBI_bacteria/referance.fasta $wd/$map_dir/${1}_BacLib_2.sai \
	$wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_2.sam.log 2>&1
echo "[filt] Finish single-end mapping for ${1}_Clean_Data2.${mate_str}fq"

过滤出mapping上细菌库的序列

perl -ane 'chomp;next if (/^\@/);if ($F[2] ne "*"){print "$_\n"}' $wd/$map_dir/${1}_BacLib_1.sam >$wd/$map_dir/${1}_BacLib_1.sam.filt
perl -ane 'chomp;next if (/^\@/);if ($F[2] ne "*"){print "$_\n"}' $wd/$map_dir/${1}_BacLib_2.sam >$wd/$map_dir/${1}_BacLib_2.sam.filt

过滤掉mapping上细菌库的序列

perl $wd/perlscript/sel_seq_for_Hiseq3000.pl $wd/$map_dir/${1}_BacLib_1.sam.filt $wd/$map_dir/${1}_BacLib_2.sam.filt \
	$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq $wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq \
	$wd/$cleandata_dir/${1}_Clean_Data1.filtBacLib.fq $wd/$cleandata_dir/${1}_Clean_Data2.filtBacLib.fq 
echo "[filt] Finish filt bac contaminate"

饱和度分析 目录

第一步,先要将reads mapping 到 knownGene或是RefSeq上,而不是完整的参考基因组

# 寻找 SA coordinates,生成sai文件
$ bwa aln -t 10 -f $wd/$map_dir/cds_${sample}_1.sai $wd/$ref_cds $wd/$cleandata_dir/${sample}_Clean_Data1.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}_1.sai.log 2>&1
$ echo "[saturation] Finish search SA coordinate for ${sample}_Clean_Data1.filtBacLib.fq"
$ bwa aln -t 10 -f $wd/$map_dir/cds_${sample}_2.sai $wd/$ref_cds $wd/$cleandata_dir/${sample}_Clean_Data2.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}_2.sai.log 2>&1
$ echo "[saturation] Finish search SA coordinate for ${sample}_Clean_Data2.filtBacLib.fq"

# 双端mapping,生成sam文件
$ bwa sampe -f $wd/$map_dir/cds_${sample}.sam $wd/$ref_cds $wd/$map_dir/cds_${sample}_1.sai \
	$wd/$map_dir/cds_${sample}_2.sai $wd/$cleandata_dir/${sample}_Clean_Data1.filtBacLib.fq $wd/$cleandata_dir/${sample}_Clean_Data2.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}.sam.log 2>&1
$ echo "[saturation] Finish pair-end mapping for $sample"

bwa aln Usage: bwa aln [options] <prefix> <in.fq>

  • -t Number of threads
  • -f File to write output to instead of stdout

bwa sampe Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>

  • -f SAM file to output results to [stdout]

接下来是饱和度分析,有两种分析策略:

  • 按照比例
  • 按照测序深度

按照比例

for i in 5 10 15 20 30 40 50 60 70 80 90
do
	export i
	perl -ne 'chomp;next if (/^\@/);if (rand()<0.01*$ENV{"i"}){print "$_\n";}' $wd/$map_dir/cds_${sample}.sam | cut -f 3 | sort | uniq | wc -l >>$wd/$satu_dir/stat/${sample}.txt
	echo "[saturation] Analyse saturation for $sample: ${i}%"
done 
rm $wd/$map_dir/cds_${sample}_[12].sa[im]
rm $wd/$map_dir/cds_${sample}.sam

按照测序深度(一个外显子组大小为50M)

total_reads=`perl -ne 'chomp;next if (/^\@/);print "$_\n"' $wd/$map_dir/cds_${sample}.sam | wc -l | perl -ane 'chomp;print "$F[0]"'`
export total_reads
exome_length=50000000
reads_length=150
depth1_reads=$[$exome_length/$reads_length]
export depth1_reads
max_depth=$[$total_reads*$reads_length/$exome_length]
if [ -f $wd/$satu_dir/stat/${sample}_depth.txt ]
then
	rm $wd/$satu_dir/stat/${sample}_depth.txt
fi
for ((i=5;i<=max_depth;i+=5))
do
	current_depth=$i
	export current_depth
	echo -ne "$current_depth\t" >>$wd/$satu_dir/stat/${sample}_depth.txt
	perl -ne 'chomp;next if (/^\@/);if (rand()<($ENV{"current_depth"}*$ENV{"depth1_reads"}/$ENV{"total_reads"})){print "$_\n";}' $wd/$map_dir/cds_${sample}.sam | cut -f 3 | sort | uniq | wc -l >>$wd/$satu_dir/stat/${sample}_depth.txt
	echo "[saturation] Analyse saturation for $sample: ${current_depth}X"
done