-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathblock_node.py
177 lines (136 loc) · 4.83 KB
/
block_node.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
from contig_loc import ContigLocation
import copy
class Node:
'''
def __init__(self, line_num, ref_name, ref_start, ref_stop, asm_name, asm_start, asm_stop):
#line number in the nucmer alignment output- to keep track of where this came from and for debugging later
self.line_num = int(line_num)
self.ref = ContigLocation(ref_name, ref_start, ref_stop)
self.asm_original = ContigLocation(asm_name, asm_start, asm_stop)
self.asm = ContigLocation(asm_name, asm_start, asm_stop)
#the edges between this node and any others
self._edges = []
self._edges_sorted = True #used to help amortize sorting
#since the final product should be a collection of nodes with at most one successor and one
#predecessor, it works well as a linked list
#TODO add these as parameters in the arguments list, with default values None
self.prev = None
self.next = None
'''
def __init__(self, line_num, ref_CL, asm_CL, asm_original_CL = None, my_edges = None, p_node = None, n_node = None):
#line number in the nucmer alignment output- to keep track of where this came from and for debugging later
self.line_num = int(line_num)
self.ref = ref_CL #ContigLocation defining where this node aligns to in the reference
self.asm = asm_CL #ContigLocation defining where this node belong in the assembly
if ref_CL is None:
assert(2==3)
if asm_CL is None:
assert(3==4)
#asm_original is a ContigLocation defining where this node was originally placed in the assembly
if asm_original_CL is None:
#TODO
#deep copy so that changes to asm_CL do not affect asm_original
#on the other hand, note that this means asm_original == asm will always be false, so compare .name instead?
#actually that won't work, but not important now
#assert(1==2) #so I don't forget to take care of this before actual trials
self.asm_original = copy.deepcopy(asm_CL)
else:
self.asm_original = asm_original_CL
self._edges = []
if my_edges is None:
self._edges_sorted = True #used to help amortize sorting
else:
self._edges.extend(my_edges) #more general- allows passing of lists or sets
self._edges_sorted = False #safer but slower
#TODO evaluate this choice later; may be able to keep this as true depending on how it's used in node_list_generator.py
self.prev = p_node
self.next = n_node
self.seq = ""
def add_edge(self, edge):
self._edges.append(edge)
self._edges_sorted = False
'''
def remove_edge(self, edge):
try:
self._edges.remove(edge)
except ValueError:
pass
'''
def remove_edge(self, edge):
self._edges.remove(edge)
def get_edges(self):
return self._edges
#TODO double check to make sure this is sorting the way I expect it to
#TODO quick familiar implementation below; this can be optimized further according to
#https://stackoverflow.com/questions/403421/how-to-sort-a-list-of-objects-based-on-an-attribute-of-the-objects
def get_sorted_edges(self):
if (not self._edges_sorted):
self._edges.sort( key = lambda e : e.edge_low(self) )
self._edges_sorted = True
return self._edges
def new_edge_endpoints(self, old_node):
for edge in self._edges:
if edge.node1 is old_node:
edge.node1 = self
if edge.node2 is old_node:
edge.node2 = self
def clear(self):
self.ref = None
self.asm = None
self.asm_original = None
self._edges = None
self.prev = None
self.next = None
self.seq = None
def shift_coords(self, num):
self.asm.shift(num)
def shift_edges(self, num):
for edge in self._edges:
edge.shift(self, num)
def shift(self, num):
self.shift_coords(num)
self.shift_edges(num)
def printn(self):
print(str(self.line_num))
def __str__(self):
ans = []
ans.append(str(self.line_num))
ans.append(str(self.ref))
ans.append(str(self.asm))
return "\t".join(ans)
def print_surround_nodes(self):
prevN = "No prev node"
nextN = "No next node"
if self.prev is not None:
prevN = str(self.prev)
if self.next is not None:
nextN = str(self.next)
print(prevN)
print(str(self))
print(nextN)
def tests(self):
print(str(self))
debug_print = False
for edge in self._edges:
debug_print = False
#assembly alignments may be in reverse order
block_asm_start = self.asm.low()
block_asm_stop = self.asm.high()
if (self is edge.node1):
edge_asm_start = edge.asm1.low()
edge_asm_stop = edge.asm1.high()
if not(block_asm_start <= edge_asm_start):
debug_print = True
if not(block_asm_stop >= edge_asm_stop):
debug_print = True
elif(self is edge.node2):
edge_asm_start = edge.asm2.low()
edge_asm_stop = edge.asm2.high()
if not(block_asm_start <= edge_asm_start):
debug_print = True
if not(block_asm_stop >= edge_asm_stop):
debug_print = True
else:
assert(1==2) #make this fail noticeably- this case shouldn't ever happen during normal execution
if(debug_print):
print(edge.node_info(self))