-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpoolBLASTS.py
executable file
·54 lines (45 loc) · 1.33 KB
/
poolBLASTS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/python
import sys, os, csv, glob
"""poolBLASTS.py takes the metagenome metadata and a set of metagenome blasts,
and pools each month from the same year together."""
__author__ = "Sarah Stevens"
__email__ = "[email protected]"
def usage():
print "Usage: poolBLASTS.py blastfile metadatafile"
sys.exit(2)
if len(sys.argv) != 3:
usage()
exit()
# Ins and Outs
blastfile=open(sys.argv[1], 'rU')
blast=blastfile.read()
output=open(sys.argv[1]+'.pooled', 'w')
monthyear=[]
tocat=[]
# open file
with open(sys.argv[2], 'rU') as metafile:
# Read through each line of metadata
metadata=csv.reader(metafile, delimiter='\t')
for row in metadata:
name=row[0]
year=row[5]
month=row[6].zfill(2)
my=year+"_"+month # new names for pooled
# Replace sample name with year_month
blast=blast.replace(name,my)
if my not in monthyear:
monthyear.append(my)
tocat.append([])
index=monthyear.index(my)
tocat[index].append(name)
output.write(blast)
output.close()
outfile=open('1metaMonYears.txt', 'w') # pools with only 1 sample in that month
outfile2=open('pools.txt','w') # file to match up the sample with the pool easily
for i,group in enumerate(tocat):
if len(group) == 1:
outfile.write(monthyear[i]+'\t'+group[0]+'\n')
for sample in group:
outfile2.write(sample+'\t'+monthyear[i]+'\n')
outfile.close()
outfile2.close()