-
Notifications
You must be signed in to change notification settings - Fork 0
/
splitcoveragfile.py
executable file
·44 lines (36 loc) · 1000 Bytes
/
splitcoveragfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python
import sys, csv, os
"""splitcoveragefile.py This will take the coverage file that from Matt Bendall that has the coverage
across many time points for each contigs in many genomes, it will split up the data for the next
steps in the process(with R....hopefully)"""
__author__ = "Sarah Stevens"
__email__ = "[email protected]"
def usage():
print "Usage: splitcoveragfile.py inputfile"
if len(sys.argv) != 2:
usage()
sys.exit(2)
#covfile=sys.argv[1]
covfile=open(sys.argv[1], 'rU')
cov=covfile.readlines()
covend=sys.argv[1].split('.')[1]
gnlist=[]
glist=[]
for line in cov:
contig=line.split('\t')[0]
gname=contig.split('|')[0]
if gname not in glist:
glist.append(gname)
gnlist.append([])
index=glist.index(gname)
gnlist[index].append(line)
for list in gnlist:
index=gnlist.index(list)
name=glist[index]
if name=='contig':
continue
output=open(name+'.'+covend+'.txt','w')
output.write(gnlist[0][0])
for line in list:
output.write(line)
output.close()