-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf_combineVariants-format-compare.py
executable file
·42 lines (34 loc) · 1.72 KB
/
vcf_combineVariants-format-compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
from itertools import *
from VcfFile import *
from VcfSampleEval import *
from optparse import OptionParser
from common import grouper
from common import typeofGenotype
import os
def main():
usage = "usage: %prog [options] file.vcf \n output format values from genotype data field in a merged VCF generated by CombineVariants from GATK for suitabale plotting/dataviz"
parser = OptionParser(usage)
parser.add_option("--includeRef", action="store_true", dest="includeRef", help="include sites in the set ReferenceInAll", default=False)
parser.add_option("--includeFilter", action="store_true", dest="includeFilter", help="include site filtered or not!", default=False)
parser.add_option("--formatTag", dest="format", default="GT", help="format tag to compare (default GT)")
(options, args)=parser.parse_args()
vcfilename=args[0]
#vcfilename='/Users/indapa/software/Pgmsnp/PythonNotebook/child5x.nrs.sites.calledWith20x_bam.child5x.nrs.sites.calledWith5x_bam.combineVariants.vcf'
basename=os.path.splitext(vcfilename)[0]
vcfobj=VcfFile(vcfilename)
vcfh=open(vcfilename,'r')
vcfobj.parseMetaAndHeaderLines(vcfh)
header=vcfobj.returnHeader() +"\n"
samples=vcfobj.getSampleList()
print "\t".join(samples)
for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh):
vrec_ziptuple=vrec.zipGenotypes(samples)
outputs=[]
for (compare, eval) in grouper(2,vrec_ziptuple):
compareGenobj= compare[1]
evalGenobj= eval[1]
outputs.append( "\t".join( [compareGenobj.getFormatVal(options.format), evalGenobj.getFormatVal(options.format) ] ) )
print "\t".join(outputs)
if __name__ == "__main__":
main()