Skip to content

Commit 2e5038d

Browse files
committed
feat: Enhance Trie with New Features
- Added `count_words()` to count the total number of words in the Trie. - Implemented `longest_common_prefix()` to find the longest common prefix among all words. - Added `autocomplete(prefix)` to provide autocomplete suggestions. - Implemented `bulk_insert(words)` to insert multiple words at once. - Added `clear()` method to remove all words from the Trie. - Implemented `is_empty()` to check if the Trie is empty. - Added `find_all_words()` to retrieve all stored words. - Implemented `shortest_unique_prefix(word)` to find the shortest unique prefix of a word. - Added `starts_with(prefix)` to check if any word starts with a given prefix. - Implemented `longest_word()` to find the longest word in the Trie.
1 parent f4c1677 commit 2e5038d

File tree

2 files changed

+221
-18
lines changed

2 files changed

+221
-18
lines changed

pydatastructs/strings/tests/test_trie.py

+35
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,38 @@ def test_Trie():
4747
for j in range(i + 1):
4848
assert trie_1.is_inserted(prefix_strings_1[j])
4949
assert trie_1.is_present(prefix_strings_1[j])
50+
51+
assert trie_1.count_words() == 3
52+
53+
assert trie_1.longest_common_prefix() == "dict"
54+
55+
assert trie_1.autocomplete("dict") == ["dict", "dicts", "dicts_lists_tuples"]
56+
57+
trie_2 = Trie()
58+
trie_2.bulk_insert(["apple", "app", "apricot", "banana"])
59+
assert trie_2.count_words() == 4
60+
61+
trie_2.clear()
62+
assert trie_2.count_words() == 0
63+
assert trie_2.is_empty()
64+
65+
assert trie_2.is_empty()
66+
67+
trie_3 = Trie()
68+
trie_3.insert("hello")
69+
trie_3.insert("world")
70+
assert sorted(trie_3.all_words()) == ["hello", "world"]
71+
72+
trie_4 = Trie()
73+
trie_4.bulk_insert(["zebra", "dog", "duck", "dove"])
74+
print(trie_4.shortest_unique_prefix())
75+
assert trie_4.shortest_unique_prefix() == {
76+
"zebra": "z",
77+
"dog": "dog",
78+
"duck": "du",
79+
"dove": "dov"
80+
}
81+
assert trie_4.starts_with("do")
82+
assert not trie_4.starts_with("cat")
83+
84+
assert trie_4.longest_word() == "zebra"

pydatastructs/strings/trie.py

+186-18
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ class Trie(object):
4949
@classmethod
5050
def methods(cls):
5151
return ['__new__', 'insert', 'is_present', 'delete',
52-
'strings_with_prefix']
52+
'strings_with_prefix', 'count_words', 'longest_common_prefix',
53+
'autocomplete', 'bulk_insert', 'clear', 'is_empty',
54+
'all_words', 'shortest_unique_prefix', 'starts_with',
55+
'longest_word']
5356

5457
def __new__(cls, **kwargs):
5558
raise_if_backend_is_not_python(
@@ -176,26 +179,191 @@ def strings_with_prefix(self, string: str) -> list:
176179
The list of strings with the given prefix.
177180
"""
178181

179-
def _collect(prefix: str, node: TrieNode, strings: list) -> str:
180-
TrieNode_stack = Stack()
181-
TrieNode_stack.append((node, prefix))
182-
while TrieNode_stack:
183-
walk, curr_prefix = TrieNode_stack.pop()
184-
if walk.is_terminal:
185-
strings.append(curr_prefix + walk.char)
186-
for child in walk._children:
187-
TrieNode_stack.append((walk.get_child(child), curr_prefix + walk.char))
182+
def _collect(node: TrieNode, prefix: str, strings: list):
183+
if node.is_terminal:
184+
strings.append(prefix)
185+
for child in node._children:
186+
_collect(node.get_child(child), prefix + child, strings)
188187

189188
strings = []
190-
prefix = ""
191189
walk = self.root
192190
for char in string:
193-
walk = walk.get_child(char)
194-
if walk is None:
191+
if walk.get_child(char) is None:
195192
return strings
196-
prefix += char
197-
if walk.is_terminal:
198-
strings.append(walk.char)
199-
for child in walk._children:
200-
_collect(prefix, walk.get_child(child), strings)
193+
walk = walk.get_child(char)
194+
_collect(walk, string, strings)
201195
return strings
196+
197+
def count_words(self) -> int:
198+
"""
199+
Returns the total number of words inserted into the trie.
200+
201+
Returns
202+
=======
203+
204+
count: int
205+
The total number of words in the trie.
206+
"""
207+
def _count(node: TrieNode) -> int:
208+
count = 0
209+
if node.is_terminal:
210+
count += 1
211+
for child in node._children:
212+
count += _count(node.get_child(child))
213+
return count
214+
215+
return _count(self.root)
216+
217+
def longest_common_prefix(self) -> str:
218+
"""
219+
Finds the longest common prefix among all the words in the trie.
220+
221+
Returns
222+
=======
223+
224+
prefix: str
225+
The longest common prefix.
226+
"""
227+
prefix = ""
228+
walk = self.root
229+
while len(walk._children) == 1 and not walk.is_terminal:
230+
char = next(iter(walk._children))
231+
prefix += char
232+
walk = walk.get_child(char)
233+
return prefix
234+
235+
def autocomplete(self, prefix: str) -> list:
236+
"""
237+
Provides autocomplete suggestions based on the given prefix.
238+
239+
Parameters
240+
==========
241+
242+
prefix: str
243+
244+
Returns
245+
=======
246+
247+
suggestions: list
248+
A list of autocomplete suggestions.
249+
"""
250+
return self.strings_with_prefix(prefix)
251+
252+
def bulk_insert(self, words: list) -> None:
253+
"""
254+
Inserts multiple words into the trie.
255+
256+
Parameters
257+
==========
258+
259+
words: list
260+
A list of words to be inserted.
261+
262+
Returns
263+
=======
264+
265+
None
266+
"""
267+
for word in words:
268+
self.insert(word)
269+
270+
def clear(self) -> None:
271+
"""
272+
Clears the trie, removing all words.
273+
274+
Returns
275+
=======
276+
277+
None
278+
"""
279+
self.root = TrieNode()
280+
281+
def is_empty(self) -> bool:
282+
"""
283+
Checks if the trie is empty.
284+
285+
Returns
286+
=======
287+
288+
bool
289+
True if the trie is empty, False otherwise.
290+
"""
291+
return not self.root._children
292+
293+
def all_words(self) -> list:
294+
"""
295+
Retrieves all words stored in the trie.
296+
297+
Returns
298+
=======
299+
300+
words: list
301+
A list of all words in the trie.
302+
"""
303+
return self.strings_with_prefix("")
304+
305+
def shortest_unique_prefix(self) -> dict:
306+
"""
307+
Finds the shortest unique prefix for each word in the trie.
308+
309+
Returns
310+
=======
311+
prefixes: dict
312+
A dictionary where keys are words and values are their shortest unique prefixes.
313+
"""
314+
def _find_prefix(node: TrieNode, prefix: str, prefixes: dict, word: str = ""):
315+
if node.is_terminal:
316+
prefixes[word] = prefix # Store full word as key
317+
for child in node._children:
318+
new_word = word + child # Build full word
319+
new_prefix = prefix + child
320+
if len(node._children) > 1 or node.is_terminal:
321+
_find_prefix(node.get_child(child), new_prefix, prefixes, new_word)
322+
else:
323+
_find_prefix(node.get_child(child), prefix, prefixes, new_word)
324+
325+
prefixes = {}
326+
_find_prefix(self.root, "", prefixes)
327+
return prefixes
328+
329+
330+
def starts_with(self, prefix: str) -> bool:
331+
"""
332+
Checks if any word in the trie starts with the given prefix.
333+
334+
Parameters
335+
==========
336+
337+
prefix: str
338+
339+
Returns
340+
=======
341+
342+
bool
343+
True if any word starts with the prefix, False otherwise.
344+
"""
345+
walk = self.root
346+
for char in prefix:
347+
if walk.get_child(char) is None:
348+
return False
349+
walk = walk.get_child(char)
350+
return True
351+
352+
def longest_word(self) -> str:
353+
"""
354+
Finds the longest word stored in the trie.
355+
356+
Returns
357+
=======
358+
359+
word: str
360+
The longest word in the trie.
361+
"""
362+
def _longest(node: TrieNode, current_word: str, longest_word: str) -> str:
363+
if node.is_terminal and len(current_word) > len(longest_word):
364+
longest_word = current_word
365+
for child in node._children:
366+
longest_word = _longest(node.get_child(child), current_word + child, longest_word)
367+
return longest_word
368+
369+
return _longest(self.root, "", "")

0 commit comments

Comments
 (0)