-
Notifications
You must be signed in to change notification settings - Fork 2
/
virus-hierarchy.py
executable file
·104 lines (76 loc) · 2.75 KB
/
virus-hierarchy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
from __future__ import print_function
import sys
import sqlite3
import argparse
from collections import Counter
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='Describe the full virus taxonomy.')
parser.add_argument(
'--maxDepth', type=int,
help='The maximum depth to descend.')
args = parser.parse_args()
maxDepth = args.maxDepth
width = 40
conn = sqlite3.connect('taxonomy.db')
conn.row_factory = sqlite3.Row
class LevelRecorder():
def __init__(self):
self.levels = {}
def add(self, rank, level, parentRank):
try:
currentLevel, currentParentRank, count = self.levels[rank]
except KeyError:
self.levels[rank] = (level, parentRank, 1)
# assert self.levels[parentRank][0] < level, parentRank
else:
if level > currentLevel:
print(
'Adjusting %s from (level: %d, parent: %s) to '
'(level: %d parent: %s).' %
(rank, currentLevel, currentParentRank, level, parentRank),
file=sys.stderr)
self.levels[rank] = (level, parentRank, count + 1)
else:
self.levels[rank] = (currentLevel, currentParentRank,
count + 1)
levels = LevelRecorder()
def name(taxid, conn):
cur = conn.execute('SELECT name FROM names WHERE taxid = ?', (taxid,))
return cur.fetchone()['name']
def rank(taxid, conn):
cur = conn.execute('SELECT rank FROM nodes WHERE taxid = ?', (taxid,))
return cur.fetchone()['rank']
def descendants(parentTaxid, parentRank, depth, conn):
if maxDepth is not None and depth > maxDepth:
return
indent = ' ' * depth
print('%-*s %s' % (
width, '%s%d. %s' % (indent, depth, parentRank),
name(parentTaxid, conn)))
cur = conn.execute(
'SELECT taxid, rank FROM nodes WHERE parent_taxid = ?',
(parentTaxid,))
for row in cur.fetchall():
childRank = row['rank']
if childRank == 'no rank':
childRank = '-' + parentRank
descendants(row['taxid'], childRank, depth + 1, conn)
levels.add(childRank, depth + 1, parentRank)
levels.add(rank(10239, conn), 0, None)
descendants(10239, rank(10239, conn), 0, conn)
print('\nLevel summary\n')
thisLevel = 0
while True:
ranks = Counter()
for rank, (level, parentRank, count) in levels.levels.items():
if level == thisLevel:
if rank is not None:
ranks[rank] += count
if ranks:
for rank, count in ranks.items():
print('%s%d: %s (%d)' % (' ' * thisLevel, thisLevel, rank, count))
thisLevel += 1
else:
break