-
Notifications
You must be signed in to change notification settings - Fork 2
/
repeat_filter.py
45 lines (39 loc) · 1.68 KB
/
repeat_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import argparse
import subprocess
parser = argparse.ArgumentParser()
parser.add_argument('-vcf', '--vcf', help='VCF with variants to filter', required=True)
parser.add_argument('-bed', '--bed_repeats', help='BED file with repeat regions listed', required=True)
parser.add_argument('-ref', '--reference', help='Reference genome', required=True)
args = parser.parse_args()
# files
variants = args.vcf
repeats = args.bed_repeats
reference = args.reference
output_prefix = variants.rstrip('.vcf')
# VariantFiltration
VarFil_cmdline = ('"java -Xmx6g -jar \$GATKHOME/GenomeAnalysisTK.jar '
'-T VariantFiltration '
'-R ' + reference + ' '
'-V ' + variants + ' '
'-o ' + output_prefix + '.repeatfilter.vcf '
'--mask ' + repeats + ' '
'--maskName Repeats"')
# SelectVariants
SelVar_cmdline = ('"java -Xmx6g -jar \$GATKHOME/GenomeAnalysisTK.jar '
'-T SelectVariants '
'-R ' + reference + ' '
'-V ' + output_prefix + '.repeatfilter.vcf '
'-o ' + output_prefix + '.repeatfilter.pass.vcf '
'--excludeFiltered"')
# remove intermediate file
rm_cmdline = '"rm ' + output_prefix + '.repeatfilter.vcf"'
# submit job
qsub_cmdline = ('python qsub_gen.py '
'-cmd ' + VarFil_cmdline + ' '
'-cmd ' + SelVar_cmdline + ' '
'-cmd ' + rm_cmdline + ' '
'-o ' + output_prefix + '.repeatfiltering '
'-mo gatk -mo java '
'-OM q '
'-mem 15 -rmem 10')
subprocess.call(qsub_cmdline, shell=True)