-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathblast_tab_parser.py
64 lines (46 loc) · 1.76 KB
/
blast_tab_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Get the best Blast hits from a tab file based on their Bitscore and Identity % (in case of tie keeps all hits).
# e.g. python3 blast_tab_parser.py input.tab output.tab
import sys
def parse_file(input_file):
inputFile = open(input_file, "r")
loci = []
matches = []
bitscores = []
save_lines = dict()
for line in inputFile:
line = line.split("\t")
locus = line[0]
if locus not in loci:
loci.append(locus)
if len(matches) > 0:
matches_best_score = [matches[x] for x in range(len(bitscores))
if bitscores[x] == max(bitscores)]
save_lines[loci[-2]] = [max(matches_best_score), max(bitscores)]
bitscores = []
matches = []
matches.append(line[2])
bitscores.append(line[11])
else:
matches.append(line[2])
bitscores.append(line[11])
# last line exception
matches_best_score = []
for i in range(len(bitscores)):
if bitscores[i] == max(bitscores):
matches_best_score.append(matches[i])
save_lines[loci[-1]] = [max(matches_best_score), max(bitscores)]
inputFile.close()
return save_lines
def write_output(input_file, saveLines, outputFile):
inputFile = open(input_file, "r")
output = open(outputFile, "w")
for line in inputFile:
line = line.split("\t")
if line[2] == saveLines[line[0]][0] and line[11] == saveLines[line[0]][1]:
output.write("\t".join(line))
output.close()
if __name__ == "__main__":
try:
write_output(sys.argv[1], parse_file(sys.argv[1]), sys.argv[2])
except IndexError:
write_output(sys.argv[1], parse_file(sys.argv[1]), "best_hits.tab")