-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvcf_removeSamples.py
executable file
·58 lines (42 loc) · 1.65 KB
/
vcf_removeSamples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
import gzip
from itertools import *
from VcfFile import *
from VcfSampleEval import *
from optparse import OptionParser
import argparse
import os
def main():
""" remove samples from a vcf file """
usage = "usage: %prog [options] file.vcf.gz "
#parser = OptionParser(usage)
parser = argparse.ArgumentParser(description='remove samples from vcf file')
parser.add_argument('removesamples', metavar='sample', type=str, nargs='+',
help='sample names to remove')
parser.add_argument('-vcf', dest='vcfile', type=str, help="vcf file to remove samples from")
#parser.add_argument("vcf", help="vcf file to analyze")
args = parser.parse_args()
#print 'remove these samples: ', args.samples
#print args.vcfile
vcfh=gzip.open(args.vcfile,'r')
vcfobj=VcfFile(args.vcfile)
vcfobj.parseMetaAndHeaderLines(vcfh)
#print header
samples=vcfobj.getSampleList()
newsamples= [ s for s in samples if s not in args.removesamples]
#print 'keep these samples: ', newsamples
vcfobj.setSampleList(newsamples)
header=vcfobj.returnHeader()
print header
for vrec in vcfobj.yieldVcfRecordwithGenotypes(vcfh):
keepGenotypes=[]
vrec_ziptuple=vrec.zipGenotypes(samples)
for (s, genObj) in vrec_ziptuple:
if s not in args.removesamples:
#print s
keepGenotypes.append( genObj )
#print keepGenotypes
vrec.addGenotypeList( keepGenotypes )
print vrec.toStringwithGenotypes()
if __name__ == "__main__":
main()