-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
140 lines (111 loc) · 4.12 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from itertools import *
import collections
def powerset(iterable):
"""powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"""
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def is_augmentation(original, augmented):
"""
Checks to see if augmented is an augmentation of original. It needs to use all of original's letters
:param original: string
:param augmented: string
:return: bool
"""
counter = collections.Counter(augmented)
for c in original:
counter[c] -= 1
if counter[c] < 0:
return False
return True
class Node:
def __init__(self, label=None, data=None):
self.label = label
self.data = data
self.children = dict()
def add_child(self, key, data=None):
if not isinstance(key, Node):
self.children[key] = Node(key, data)
else:
self.children[key.label] = key
def __getitem__(self, key):
return self.children[key]
class Trie:
def __init__(self):
self.head = Node()
def __getitem__(self, key):
return self.head.children[key]
def add(self, word):
current_node = self.head
word_finished = True
for i in range(len(word)):
if word[i] in current_node.children:
current_node = current_node.children[word[i]]
else:
word_finished = False
break
# For ever new letter, create a new child node
if not word_finished:
while i < len(word):
current_node.add_child(word[i])
current_node = current_node.children[word[i]]
i += 1
# Let's store the full word at the end node so we don't need to
# travel back up the tree to reconstruct the word
current_node.data = word
def has_word(self, word):
if word == '':
return False
if not word:
raise ValueError('Trie.has_word requires a not-Null string')
# Start at the top
current_node = self.head
exists = True
for letter in word:
if letter in current_node.children:
current_node = current_node.children[letter]
else:
exists = False
break
# Still need to check if we just reached a word like 't'
# that isn't actually a full word in our dictionary
if exists:
if not current_node.data:
exists = False
return exists
def start_with_prefix(self, prefix):
""" Returns a list of all words in tree that start with prefix """
words = list()
if not prefix:
raise ValueError('Requires not-Null prefix')
# Determine end-of-prefix node
top_node = self.head
for letter in prefix:
if letter in top_node.children:
top_node = top_node.children[letter]
else:
# Prefix not in tree, go no further
return words
# Get words under prefix
if top_node == self.head:
queue = [node for key, node in top_node.children.iteritems()]
else:
queue = [top_node]
# Perform a breadth first search under the prefix
# A cool effect of using BFS as opposed to DFS is that BFS will return
# a list of words ordered by increasing length
while queue:
current_node = queue.pop()
if current_node.data:
# Isn't it nice to not have to go back up the tree?
words.append(current_node.data)
queue = [node for key, node in current_node.children.iteritems()] + queue
return words
def get_data(self, word):
""" This returns the 'data' of the node identified by the given word """
if not self.has_word(word):
raise ValueError('{} not found in trie'.format(word))
# Race to the bottom, get data
current_node = self.head
for letter in word:
current_node = current_node[letter]
return current_node.data