-
Notifications
You must be signed in to change notification settings - Fork 0
/
mapping_qc.txt
43 lines (33 loc) · 1.75 KB
/
mapping_qc.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#### To perform some quality check of SAM files ###
## Following GATK best practices
# run the container for GATK-4
# updated docker container to run GATK-4
docker pull broadinstitute/gatk:latest
docker run -it -v $PWD:/data/ broadinstitute/gatk:latest
# create another sample SAM file (useful for multi-sample comparison)
# download tumor and normal sample bam files
samtools merge -r -o merged.bam normal.bam tumor.bam
# index the merged bam file
samtools index merged.bam
# mark duplicates
gatk MarkDuplicates \
I=mapping_files/merged_sorted.bam \
O=mapping_files/marked_duplicates.bam \
M=mapping_files/marked_dup_metrics.txt
# sorting the records
gatk SortSam -I mapping_files/marked_duplicates.bam -O mapping_files/merged_sorted.bam -SORT_ORDER coordinate
# BQSR
# download the file
wget https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf
gatk IndexFeatureFile --input Homo_sapiens_assembly38.dbsnp138.vcf
# create ref genome index using samtools
samtools faidx ref_genome/chr1.fa
wget https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/Homo_sapiens_assembly38.dict
gatk CreateSequenceDictionary -R ref_genome/chr1.fa
## add read group info if not present already
# extract records from chr1 alone using bed file
samtools view -L mapping_files/bed_file2.bed mapping_files/merged_sorted.bam -o mapping_files/chr1_bamfile.bam
#BQSR
gatk BaseRecalibrator -I mapping_files/chr1_bamfile.bam -R ref_genome/chr1.fa --known-sites read_mapping/Homo_sapiens_assembly38.dbsnp138.vcf -O recal_data.table
gatk ApplyBQSR -R ref_genome/chr1.fa -I mapping_files/chr1_bamfile.bam --bqsr-recal-file recal_data.table -O mapping_files/chr1_bqsr.bam
samtools index mapping_files/chr1_bqsr.bam