-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTrie.py
150 lines (119 loc) · 4.39 KB
/
Trie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Trie implementation using Python dictionary.
"""
import unittest
class Node:
__slots__ = ['label', 'data', 'children', 'words_count'] # save memory
def __init__(self, label=None, data=None):
self.label = label
self.data = data
self.children = dict()
# the number of words this prefix is part of
self.words_count = 0
def addChild(self, key, data=None):
if not isinstance(key, Node):
self.children[key] = Node(key, data)
else:
self.children[key.label] = key
def __getitem__(self, key):
return self.children[key]
class Trie:
def __init__(self):
self.head = Node()
def __getitem__(self, key):
return self.head.children[key]
def add(self, word):
current_node = self.head
word_finished = True
for i in range(len(word)):
if word[i] in current_node.children:
current_node = current_node.children[word[i]]
current_node.words_count += 1
else:
word_finished = False
break
if not word_finished:
while i < len(word):
current_node.addChild(word[i])
current_node = current_node.children[word[i]]
current_node.words_count += 1
i += 1
current_node.data = word
def has_word(self, word):
if word == '':
return False
if word == None:
raise ValueError('Trie.has_word requires a not-Null string')
current_node = self.head
exists = True
for letter in word:
if letter in current_node.children:
current_node = current_node.children[letter]
else:
exists = False
break
if exists:
if current_node.data == None:
exists = False
return exists
def start_with_prefix(self, prefix):
""" Returns a list of all words in tree that start with prefix """
words = list()
if prefix == None:
raise ValueError('Requires not-Null prefix')
top_node = self.head
for letter in prefix:
if letter in top_node.children:
top_node = top_node.children[letter]
else:
return words
if top_node == self.head:
queue = [node for key, node in top_node.children.iteritems()]
else:
queue = [top_node]
# BFS under prefix, BFS will return
# a list of words ordered by increasing length
while queue:
current_node = queue.pop()
if current_node.data != None:
words.append(current_node.data)
queue = [node for key,node in current_node.children.iteritems()] + queue
return words
def count_prefix(self, prefix):
"""
Count the words that have given prefix.
`start_with_prefix` returns list of all such words. This approach is more
efficient when we just need the count of words that share the same prefix.
"""
curr = self.head
for c in prefix:
next_node = curr.children.get(c, None)
if next_node is None:
return 0 # prefix not found
curr = next_node
return curr.words_count
def getData(self, word):
""" This returns the 'data' of the node identified by the given word """
if not self.has_word(word):
raise ValueError('{} not found in trie'.format(word))
current_node = self.head
for letter in word:
current_node = current_node[letter]
return current_node.data
class TestTrie(unittest.TestCase):
def setUp(self):
self.trie = Trie()
words = 'hackerrank hackfest'
for word in words.split():
self.trie.add(word)
def test_has_word(self):
self.assertFalse(self.trie.has_word('hack'))
def test_count_prefix(self):
self.assertEqual(2, self.trie.count_prefix('hack'))
self.assertEqual(0, self.trie.count_prefix('hacky'))
self.assertEqual(1, self.trie.count_prefix('hacke'))
def test_start_with_prefix(self):
self.assertEqual(['hackfest', 'hackerrank'], self.trie.start_with_prefix('hack'))
self.assertEqual([], self.trie.start_with_prefix('hacky'))
if __name__ == '__main__':
unittest.main()