目录
过滤细菌库 目录
bwa aln -t 10 -f $wd/$map_dir/${1}_BacLib_1.sai $wd/Ref/NCBI_bacteria/referance.fasta \
$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_1.sai.log 2>&1
echo "[filt] Finish searching SA coordinate for ${1}_Clean_Data1.${mate_str}fq"
bwa aln -t 10 -f $wd/$map_dir/${1}_BacLib_2.sai $wd/Ref/NCBI_bacteria/referance.fasta \
$wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_2.sai.log 2>&1
echo "[filt] Finish searching SA coordinate for ${1}_Clean_Data2.${mate_str}fq"
bwa samse -f $wd/$map_dir/${1}_BacLib_1.sam $wd/Ref/NCBI_bacteria/referance.fasta $wd/$map_dir/${1}_BacLib_1.sai \
$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_1.sam.log 2>&1
echo "[filt] Finish single-end mapping for ${1}_Clean_Data1.${mate_str}fq"
bwa samse -f $wd/$map_dir/${1}_BacLib_2.sam $wd/Ref/NCBI_bacteria/referance.fasta $wd/$map_dir/${1}_BacLib_2.sai \
$wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq 1>$wd/$map_dir/${1}_BacLib_2.sam.log 2>&1
echo "[filt] Finish single-end mapping for ${1}_Clean_Data2.${mate_str}fq"
perl -ane 'chomp;next if (/^\@/);if ($F[2] ne "*"){print "$_\n"}' $wd/$map_dir/${1}_BacLib_1.sam >$wd/$map_dir/${1}_BacLib_1.sam.filt
perl -ane 'chomp;next if (/^\@/);if ($F[2] ne "*"){print "$_\n"}' $wd/$map_dir/${1}_BacLib_2.sam >$wd/$map_dir/${1}_BacLib_2.sam.filt
perl $wd/perlscript/sel_seq_for_Hiseq3000.pl $wd/$map_dir/${1}_BacLib_1.sam.filt $wd/$map_dir/${1}_BacLib_2.sam.filt \
$wd/$cleandata_dir/${1}_Clean_Data1.${mate_str}fq $wd/$cleandata_dir/${1}_Clean_Data2.${mate_str}fq \
$wd/$cleandata_dir/${1}_Clean_Data1.filtBacLib.fq $wd/$cleandata_dir/${1}_Clean_Data2.filtBacLib.fq
echo "[filt] Finish filt bac contaminate"
饱和度分析 目录
第一步,先要将reads mapping 到 knownGene或是RefSeq上,而不是完整的参考基因组
# 寻找 SA coordinates,生成sai文件
$ bwa aln -t 10 -f $wd/$map_dir/cds_${sample}_1.sai $wd/$ref_cds $wd/$cleandata_dir/${sample}_Clean_Data1.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}_1.sai.log 2>&1
$ echo "[saturation] Finish search SA coordinate for ${sample}_Clean_Data1.filtBacLib.fq"
$ bwa aln -t 10 -f $wd/$map_dir/cds_${sample}_2.sai $wd/$ref_cds $wd/$cleandata_dir/${sample}_Clean_Data2.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}_2.sai.log 2>&1
$ echo "[saturation] Finish search SA coordinate for ${sample}_Clean_Data2.filtBacLib.fq"
# 双端mapping,生成sam文件
$ bwa sampe -f $wd/$map_dir/cds_${sample}.sam $wd/$ref_cds $wd/$map_dir/cds_${sample}_1.sai \
$wd/$map_dir/cds_${sample}_2.sai $wd/$cleandata_dir/${sample}_Clean_Data1.filtBacLib.fq $wd/$cleandata_dir/${sample}_Clean_Data2.filtBacLib.fq 1>$wd/$map_dir/cds_${sample}.sam.log 2>&1
$ echo "[saturation] Finish pair-end mapping for $sample"
bwa aln
Usage: bwa aln [options] <prefix> <in.fq>
- -t Number of threads
- -f File to write output to instead of stdout
bwa sampe
Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>
- -f SAM file to output results to [stdout]
接下来是饱和度分析,有两种分析策略:
- 按照比例
- 按照测序深度
for i in 5 10 15 20 30 40 50 60 70 80 90
do
export i
perl -ne 'chomp;next if (/^\@/);if (rand()<0.01*$ENV{"i"}){print "$_\n";}' $wd/$map_dir/cds_${sample}.sam | cut -f 3 | sort | uniq | wc -l >>$wd/$satu_dir/stat/${sample}.txt
echo "[saturation] Analyse saturation for $sample: ${i}%"
done
rm $wd/$map_dir/cds_${sample}_[12].sa[im]
rm $wd/$map_dir/cds_${sample}.sam
total_reads=`perl -ne 'chomp;next if (/^\@/);print "$_\n"' $wd/$map_dir/cds_${sample}.sam | wc -l | perl -ane 'chomp;print "$F[0]"'`
export total_reads
exome_length=50000000
reads_length=150
depth1_reads=$[$exome_length/$reads_length]
export depth1_reads
max_depth=$[$total_reads*$reads_length/$exome_length]
if [ -f $wd/$satu_dir/stat/${sample}_depth.txt ]
then
rm $wd/$satu_dir/stat/${sample}_depth.txt
fi
for ((i=5;i<=max_depth;i+=5))
do
current_depth=$i
export current_depth
echo -ne "$current_depth\t" >>$wd/$satu_dir/stat/${sample}_depth.txt
perl -ne 'chomp;next if (/^\@/);if (rand()<($ENV{"current_depth"}*$ENV{"depth1_reads"}/$ENV{"total_reads"})){print "$_\n";}' $wd/$map_dir/cds_${sample}.sam | cut -f 3 | sort | uniq | wc -l >>$wd/$satu_dir/stat/${sample}_depth.txt
echo "[saturation] Analyse saturation for $sample: ${current_depth}X"
done