-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbtreeset.py
386 lines (322 loc) · 12.9 KB
/
btreeset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
#
# B-tree set (Python)
#
# Copyright (c) 2018 Project Nayuki. (MIT License)
# https://www.nayuki.io/page/btree-set
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
# - The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# - The Software is provided "as is", without warranty of any kind, express or
# implied, including but not limited to the warranties of merchantability,
# fitness for a particular purpose and noninfringement. In no event shall the
# authors or copyright holders be liable for any claim, damages or other
# liability, whether in an action of contract, tort or otherwise, arising from,
# out of or in connection with the Software or the use or other dealings in the
# Software.
#
import numbers
class BTreeSet(object):
# The degree is the minimum number of children each non-root internal node must have.
def __init__(self, degree, coll=None):
if not isinstance(degree, numbers.Integral):
raise TypeError()
if degree < 2:
raise ValueError("Degree must be at least 2")
self.minkeys = degree - 1 # At least 1, equal to degree-1
self.maxkeys = degree * 2 - 1 # At least 3, odd number, equal to minkeys*2+1
self.clear()
if coll is not None:
for obj in coll:
self.add(obj)
def __len__(self):
return self.size
def clear(self):
self.root = BTreeSet.Node(self.maxkeys, True)
self.size = 0
def __contains__(self, obj):
# Walk down the tree
node = self.root
while True:
found, index = node.search(obj)
if found:
return True
elif node.is_leaf():
return False
else: # Internal node
node = node.children[index]
def add(self, obj):
# Special preprocessing to split root node
root = self.root
if len(root.keys) == self.maxkeys:
child = root
self.root = root = BTreeSet.Node(self.maxkeys, False) # Increment tree height
root.children.append(child)
root.split_child(self.minkeys, self.maxkeys, 0)
# Walk down the tree
node = root
while True:
# Search for index in current node
assert len(node.keys) < self.maxkeys
assert node is root or len(node.keys) >= self.minkeys
found, index = node.search(obj)
if found:
return # Key already exists in tree
if node.is_leaf(): # Simple insertion into leaf
node.keys.insert(index, obj)
self.size += 1
return # Successfully added
else: # Handle internal node
child = node.children[index]
if len(child.keys) == self.maxkeys: # Split child node
node.split_child(self.minkeys, self.maxkeys, index)
if obj == node.keys[index]:
return # Key already exists in tree
elif obj > node.keys[index]:
child = node.children[index + 1]
node = child
def remove(self, obj):
if not self._remove(obj):
raise KeyError(str(obj))
def discard(self, obj):
self._remove(obj)
# Returns whether an object was removed.
def _remove(self, obj):
# Walk down the tree
root = self.root
found, index = root.search(obj)
node = root
while True:
assert len(node.keys) <= self.maxkeys
assert node is root or len(node.keys) > self.minkeys
if node.is_leaf():
if found: # Simple removal from leaf
node.remove_key(index)
assert self.size > 0
self.size -= 1
return found
else: # Internal node
if found: # Key is stored at current node
left, right = node.children[index : index + 2]
if len(left.keys) > self.minkeys: # Replace key with predecessor
node.keys[index] = left.remove_max(self.minkeys)
assert self.size > 0
self.size -= 1
return True
elif len(right.keys) > self.minkeys:
node.keys[index] = right.remove_min(self.minkeys)
assert self.size > 0
self.size -= 1
return True
else: # Merge key and right node into left node, then recurse
node.merge_children(self.minkeys, index)
if node is root and len(root.keys) == 0:
assert len(root.children) == 1
self.root = root = left # Decrement tree height
node = left
index = self.minkeys # Index known due to merging; no need to search
else: # Key might be found in some child
child = node.ensure_child_remove(self.minkeys, index)
if node is root and len(root.keys) == 0:
assert len(root.children) == 1
self.root = root = root.children[0] # Decrement tree height
node = child
found, index = node.search(obj)
# Note: Not fail-fast on concurrent modification.
def __iter__(self):
# Initialization
stack = []
def push_left_path(node):
while True:
stack.append((node, 0))
if node.is_leaf():
break
node = node.children[0]
push_left_path(self.root)
# Generate elements
while len(stack) > 0:
node, index = stack.pop()
if node.is_leaf():
assert index == 0
for obj in node.keys:
yield obj
else:
yield node.keys[index]
index += 1
if index < len(node.keys):
stack.append((node, index))
push_left_path(node.children[index])
# For unit tests
def check_structure(self):
# Check size and root node properties
size = self.size
root = self.root
if not isinstance(root, BTreeSet.Node) or size < 0 or (size > self.maxkeys and root.is_leaf()) \
or (size <= self.minkeys * 2 and (not root.is_leaf() or len(root.keys) != size)):
raise AssertionError("Invalid size or root type")
# Calculate height by descending into one branch
height = 0
node = root
while not node.is_leaf():
height += 1
node = node.children[0]
# Check all nodes and total size
if root.check_structure(self.minkeys, self.maxkeys, True, height, None, None) != size:
raise AssertionError("Size mismatch")
# ---- Helper class ----
class Node(object):
# -- Constructor --
# Note: Once created, a node's structure never changes between a leaf and internal node.
def __init__(self, maxkeys, leaf):
assert maxkeys >= 3 and maxkeys % 2 == 1
self.keys = [] # Length is in [0, maxkeys] for root node, [minkeys, maxkeys] for all other nodes
self.children = None if leaf else [] # If internal node, then length always equals len(keys)+1
# -- Methods for getting info --
def is_leaf(self):
return self.children is None
# Searches this node's keys list and returns (True, i) if obj equals keys[i],
# otherwise returns (False, i) if children[i] should be explored. For simplicity,
# the implementation uses linear search. It's possible to replace it with binary search for speed.
def search(self, obj):
keys = self.keys
i = 0
while i < len(keys):
if obj == keys[i]:
assert 0 <= i < len(keys)
return (True, i) # Key found
elif obj > keys[i]:
i += 1
else:
break
assert 0 <= i <= len(keys)
return (False, i) # Not found, caller should recurse on child
# -- Methods for insertion --
# For the child node at the given index, this moves the right half of keys and children to a new node,
# and adds the middle key and new child to this node. The left half of child's data is not moved.
def split_child(self, minkeys, maxkeys, index):
assert not self.is_leaf() and 0 <= index <= len(self.keys) < maxkeys
left = self.children[index]
assert len(left.keys) == maxkeys
right = BTreeSet.Node(maxkeys, left.is_leaf())
self.children.insert(index + 1, right)
# Handle children
if not left.is_leaf():
right.children.extend(left.children[minkeys + 1 : ])
del left.children[minkeys + 1 : ]
# Handle keys
self.keys.insert(index, left.keys[minkeys])
right.keys.extend(left.keys[minkeys + 1 : ])
del left.keys[minkeys : ]
# -- Methods for removal --
# Performs modifications to ensure that this node's child at the given index has at least
# minKeys+1 keys in preparation for a single removal. The child may gain a key and subchild
# from its sibling, or it may be merged with a sibling, or nothing needs to be done.
# A reference to the appropriate child is returned, which is helpful if the old child no longer exists.
def ensure_child_remove(self, minkeys, index):
# Preliminaries
assert not self.is_leaf() and 0 <= index < len(self.children)
child = self.children[index]
if len(child.keys) > minkeys: # Already satisfies the condition
return child
assert len(child.keys) == minkeys
# Get siblings
left = self.children[index - 1] if index >= 1 else None
right = self.children[index + 1] if index < len(self.keys) else None
internal = not child.is_leaf()
assert left is not None or right is not None # At least one sibling exists because degree >= 2
assert left is None or left .is_leaf() != internal # Sibling must be same type (internal/leaf) as child
assert right is None or right.is_leaf() != internal # Sibling must be same type (internal/leaf) as child
if left is not None and len(left.keys) > minkeys: # Steal rightmost item from left sibling
if internal:
child.children.insert(0, left.children.pop(-1))
child.keys.insert(0, self.keys[index - 1])
self.keys[index - 1] = left.remove_key(len(left.keys) - 1)
return child
elif right is not None and len(right.keys) > minkeys: # Steal leftmost item from right sibling
if internal:
child.children.append(right.children.pop(0))
child.keys.append(self.keys[index])
self.keys[index] = right.remove_key(0)
return child
elif left is not None: # Merge child into left sibling
self.merge_children(minkeys, index - 1)
return left # This is the only case where the return value is different
elif right is not None: # Merge right sibling into child
self.merge_children(minkeys, index)
return child
else:
raise AssertionError("Impossible condition")
# Merges the child node at index+1 into the child node at index,
# assuming the current node is not empty and both children have minkeys.
def merge_children(self, minkeys, index):
assert not self.is_leaf() and 0 <= index < len(self.keys)
left, right = self.children[index : index + 2]
assert len(left.keys) == len(right.keys) == minkeys
if not left.is_leaf():
left.children.extend(right.children)
del self.children[index + 1]
left.keys.append(self.remove_key(index))
left.keys.extend(right.keys)
# Removes and returns the minimum key among the whole subtree rooted at this node.
# Requires this node to be preprocessed to have at least minkeys+1 keys.
def remove_min(self, minkeys):
node = self
while True:
assert len(node.keys) > minkeys
if node.is_leaf():
return node.remove_key(0)
else:
node = node.ensure_child_remove(minkeys, 0)
# Removes and returns the maximum key among the whole subtree rooted at this node.
# Requires this node to be preprocessed to have at least minkeys+1 keys.
def remove_max(self, minkeys):
node = self
while True:
assert len(node.keys) > minkeys
if node.is_leaf():
return node.remove_key(len(node.keys) - 1)
else:
node = node.ensure_child_remove(minkeys, len(node.children) - 1)
# Removes and returns this node's key at the given index.
def remove_key(self, index):
assert 0 <= index < len(self.keys)
return self.keys.pop(index)
# -- Miscellaneous methods --
# Checks the structure recursively and returns the total number
# of keys in the subtree rooted at this node. For unit tests.
def check_structure(self, minkeys, maxkeys, isroot, leafdepth, min, max):
# Check basic fields
keys = self.keys
numkeys = len(keys)
if self.is_leaf() != (leafdepth == 0):
raise AssertionError("Incorrect leaf/internal node type")
if numkeys > maxkeys:
raise AssertionError("Invalid number of keys")
if isroot and not self.is_leaf() and numkeys == 0:
raise AssertionError("Invalid number of keys")
if not isroot and numkeys < minkeys:
raise AssertionError("Invalid number of keys")
# Check keys for strict increasing order
tempkeys = [min] + keys + [max]
for i in range(len(tempkeys) - 1):
x = tempkeys[i]
y = tempkeys[i + 1]
if x is not None and y is not None and y <= x:
raise AssertionError("Invalid key ordering")
# Check children recursively and count keys in this subtree
count = numkeys
if not self.is_leaf():
if len(self.children) != numkeys + 1:
raise AssertionError("Invalid number of children")
for (i, child) in enumerate(self.children):
# Check children pointers and recurse
if not isinstance(child, BTreeSet.Node):
raise TypeError()
count += child.check_structure(minkeys, maxkeys, False,
leafdepth - 1, tempkeys[i], tempkeys[i + 1])
return count