|
| 1 | +""" |
| 2 | +Trie - Prefix Tree |
| 3 | +
|
| 4 | +Description: A trie (pronounced "try") is a tree-based data structure that stores a dynamic |
| 5 | +set of strings where keys are usually strings. Unlike a binary search tree, nodes are not |
| 6 | +associated with specific keys; instead, a node's position in the tree defines the key it's |
| 7 | +associated with. Each node represents a single character, and paths from root to nodes form |
| 8 | +complete words or prefixes. |
| 9 | +
|
| 10 | +The trie excels at prefix-based operations and is particularly useful for: |
| 11 | +- Autocomplete functionality in search bars and text editors |
| 12 | +- Spell checking and word validation |
| 13 | +- IP routing tables (longest prefix matching) |
| 14 | +- Dictionary implementations and word games |
| 15 | +- T9 predictive text input |
| 16 | +- Genome sequence analysis |
| 17 | +
|
| 18 | +How it works: |
| 19 | +- Insert: Traverse the trie character by character, creating new nodes as needed. Mark the |
| 20 | + final node as end-of-word |
| 21 | +- Search: Traverse the trie following the characters. Return true only if we reach a node |
| 22 | + marked as end-of-word |
| 23 | +- Prefix Search: Similar to search but return true if we can traverse all characters, |
| 24 | + regardless of end-of-word marker |
| 25 | +- Delete: Recursively remove nodes bottom-up, only deleting nodes that have no other children |
| 26 | + and aren't end-of-word markers for other words |
| 27 | +- Autocomplete: Find the node for a given prefix, then recursively collect all words from |
| 28 | + that point |
| 29 | +
|
| 30 | +Time Complexity: |
| 31 | +- Insert: O(m) - visits each character once, where m is word length |
| 32 | +- Search: O(m) - visits each character once, where m is word length |
| 33 | +- Prefix Search: O(m) - visits each character once, where m is prefix length |
| 34 | +- Delete: O(m) - visits each character once, where m is word length |
| 35 | +- Autocomplete: O(p + n) - O(p) to find prefix node, O(n) to collect all matching words |
| 36 | +
|
| 37 | +Space Complexity: O(ALPHABET_SIZE × N × M) - each node can have up to ALPHABET_SIZE children, |
| 38 | +with N total words of average length M. In practice, space usage is much lower due to shared |
| 39 | +prefixes. Worst case is O(26 × N × M) for lowercase English alphabet. |
| 40 | +""" |
| 41 | + |
| 42 | + |
| 43 | +class TrieNode: |
| 44 | + """Node class for Trie structure""" |
| 45 | + |
| 46 | + def __init__(self): |
| 47 | + self.children = {} |
| 48 | + self.is_end_of_word = False |
| 49 | + self.word_count = 0 # Count of words ending at this node |
| 50 | + |
| 51 | + |
| 52 | +class Trie: |
| 53 | + """Complete Trie implementation with all common operations""" |
| 54 | + |
| 55 | + def __init__(self): |
| 56 | + """Initialize the trie with an empty root node""" |
| 57 | + self.root = TrieNode() |
| 58 | + self.total_words = 0 |
| 59 | + |
| 60 | + def insert(self, word: str) -> None: |
| 61 | + """Insert a word into the trie |
| 62 | + |
| 63 | + Args: |
| 64 | + word: String to insert |
| 65 | + |
| 66 | + Time Complexity: O(m) where m is length of word |
| 67 | + """ |
| 68 | + if not word: |
| 69 | + return |
| 70 | + |
| 71 | + node = self.root |
| 72 | + for char in word: |
| 73 | + if char not in node.children: |
| 74 | + node.children[char] = TrieNode() |
| 75 | + node = node.children[char] |
| 76 | + |
| 77 | + if not node.is_end_of_word: |
| 78 | + self.total_words += 1 |
| 79 | + node.is_end_of_word = True |
| 80 | + node.word_count += 1 |
| 81 | + |
| 82 | + def search(self, word: str) -> bool: |
| 83 | + """Search for a complete word in the trie |
| 84 | + |
| 85 | + Args: |
| 86 | + word: String to search for |
| 87 | + |
| 88 | + Returns: |
| 89 | + True if word exists in trie, False otherwise |
| 90 | + |
| 91 | + Time Complexity: O(m) where m is length of word |
| 92 | + """ |
| 93 | + node = self._find_node(word) |
| 94 | + return node is not None and node.is_end_of_word |
| 95 | + |
| 96 | + def starts_with(self, prefix: str) -> bool: |
| 97 | + """Check if any word in the trie starts with given prefix |
| 98 | + |
| 99 | + Args: |
| 100 | + prefix: Prefix string to check |
| 101 | + |
| 102 | + Returns: |
| 103 | + True if any word starts with prefix, False otherwise |
| 104 | + |
| 105 | + Time Complexity: O(m) where m is length of prefix |
| 106 | + """ |
| 107 | + return self._find_node(prefix) is not None |
| 108 | + |
| 109 | + def delete(self, word: str) -> bool: |
| 110 | + """Delete a word from the trie |
| 111 | + |
| 112 | + Args: |
| 113 | + word: String to delete |
| 114 | + |
| 115 | + Returns: |
| 116 | + True if word was deleted, False if word didn't exist |
| 117 | + |
| 118 | + Time Complexity: O(m) where m is length of word |
| 119 | + """ |
| 120 | + def _delete_helper(node: TrieNode, word: str, index: int) -> bool: |
| 121 | + if index == len(word): |
| 122 | + if not node.is_end_of_word: |
| 123 | + return False |
| 124 | + |
| 125 | + node.word_count -= 1 |
| 126 | + if node.word_count == 0: |
| 127 | + node.is_end_of_word = False |
| 128 | + self.total_words -= 1 |
| 129 | + |
| 130 | + return len(node.children) == 0 |
| 131 | + |
| 132 | + char = word[index] |
| 133 | + if char not in node.children: |
| 134 | + return False |
| 135 | + |
| 136 | + child = node.children[char] |
| 137 | + should_delete_child = _delete_helper(child, word, index + 1) |
| 138 | + |
| 139 | + if should_delete_child: |
| 140 | + del node.children[char] |
| 141 | + return len(node.children) == 0 and not node.is_end_of_word |
| 142 | + |
| 143 | + return False |
| 144 | + |
| 145 | + if not word: |
| 146 | + return False |
| 147 | + return _delete_helper(self.root, word, 0) |
| 148 | + |
| 149 | + def get_all_words_with_prefix(self, prefix: str) -> list: |
| 150 | + """Get all words in the trie that start with the given prefix |
| 151 | + |
| 152 | + Args: |
| 153 | + prefix: Prefix string |
| 154 | + |
| 155 | + Returns: |
| 156 | + List of all words with given prefix |
| 157 | + |
| 158 | + Time Complexity: O(p + n) where p is prefix length and n is number of nodes under prefix |
| 159 | + """ |
| 160 | + node = self._find_node(prefix) |
| 161 | + if node is None: |
| 162 | + return [] |
| 163 | + |
| 164 | + words = [] |
| 165 | + self._collect_all_words(node, prefix, words) |
| 166 | + return words |
| 167 | + |
| 168 | + def autocomplete(self, prefix: str, limit: int = 10) -> list: |
| 169 | + """Get autocomplete suggestions for a given prefix |
| 170 | + |
| 171 | + Args: |
| 172 | + prefix: Prefix string |
| 173 | + limit: Maximum number of suggestions to return |
| 174 | + |
| 175 | + Returns: |
| 176 | + List of up to 'limit' words that start with prefix |
| 177 | + """ |
| 178 | + words = self.get_all_words_with_prefix(prefix) |
| 179 | + return words[:limit] |
| 180 | + |
| 181 | + def count_words_with_prefix(self, prefix: str) -> int: |
| 182 | + """Count how many words start with the given prefix |
| 183 | + |
| 184 | + Args: |
| 185 | + prefix: Prefix string |
| 186 | + |
| 187 | + Returns: |
| 188 | + Number of words with given prefix |
| 189 | + """ |
| 190 | + node = self._find_node(prefix) |
| 191 | + if node is None: |
| 192 | + return 0 |
| 193 | + |
| 194 | + count = [0] |
| 195 | + self._count_words(node, count) |
| 196 | + return count[0] |
| 197 | + |
| 198 | + def longest_common_prefix(self) -> str: |
| 199 | + """Find the longest common prefix of all words in the trie |
| 200 | + |
| 201 | + Returns: |
| 202 | + Longest common prefix string |
| 203 | + """ |
| 204 | + if not self.root.children: |
| 205 | + return "" |
| 206 | + |
| 207 | + prefix = [] |
| 208 | + node = self.root |
| 209 | + |
| 210 | + while len(node.children) == 1 and not node.is_end_of_word: |
| 211 | + char = next(iter(node.children)) |
| 212 | + prefix.append(char) |
| 213 | + node = node.children[char] |
| 214 | + |
| 215 | + return ''.join(prefix) |
| 216 | + |
| 217 | + def get_all_words(self) -> list: |
| 218 | + """Get all words stored in the trie |
| 219 | + |
| 220 | + Returns: |
| 221 | + List of all words in the trie |
| 222 | + """ |
| 223 | + words = [] |
| 224 | + self._collect_all_words(self.root, "", words) |
| 225 | + return words |
| 226 | + |
| 227 | + def is_empty(self) -> bool: |
| 228 | + """Check if trie is empty |
| 229 | + |
| 230 | + Returns: |
| 231 | + True if trie has no words, False otherwise |
| 232 | + """ |
| 233 | + return self.total_words == 0 |
| 234 | + |
| 235 | + def size(self) -> int: |
| 236 | + """Get the number of words in the trie |
| 237 | + |
| 238 | + Returns: |
| 239 | + Number of words stored |
| 240 | + """ |
| 241 | + return self.total_words |
| 242 | + |
| 243 | + def clear(self) -> None: |
| 244 | + """Remove all words from the trie""" |
| 245 | + self.root = TrieNode() |
| 246 | + self.total_words = 0 |
| 247 | + |
| 248 | + # Helper methods |
| 249 | + |
| 250 | + def _find_node(self, word: str) -> TrieNode: |
| 251 | + """Find the node corresponding to the last character of word |
| 252 | + |
| 253 | + Args: |
| 254 | + word: String to find |
| 255 | + |
| 256 | + Returns: |
| 257 | + TrieNode if found, None otherwise |
| 258 | + """ |
| 259 | + node = self.root |
| 260 | + for char in word: |
| 261 | + if char not in node.children: |
| 262 | + return None |
| 263 | + node = node.children[char] |
| 264 | + return node |
| 265 | + |
| 266 | + def _collect_all_words(self, node: TrieNode, prefix: str, words: list) -> None: |
| 267 | + """Recursively collect all words from given node |
| 268 | + |
| 269 | + Args: |
| 270 | + node: Current node |
| 271 | + prefix: Current prefix |
| 272 | + words: List to collect words into |
| 273 | + """ |
| 274 | + if node.is_end_of_word: |
| 275 | + words.append(prefix) |
| 276 | + |
| 277 | + for char, child in node.children.items(): |
| 278 | + self._collect_all_words(child, prefix + char, words) |
| 279 | + |
| 280 | + def _count_words(self, node: TrieNode, count: list) -> None: |
| 281 | + """Recursively count words from given node |
| 282 | + |
| 283 | + Args: |
| 284 | + node: Current node |
| 285 | + count: List containing single count value |
| 286 | + """ |
| 287 | + if node.is_end_of_word: |
| 288 | + count[0] += 1 |
| 289 | + |
| 290 | + for child in node.children.values(): |
| 291 | + self._count_words(child, count) |
| 292 | + |
| 293 | + |
| 294 | +# Example usage and testing |
| 295 | +if __name__ == "__main__": |
| 296 | + # Create a new trie |
| 297 | + trie = Trie() |
| 298 | + |
| 299 | + # Insert words |
| 300 | + words = ["apple", "app", "application", "apply", "banana", "band", "bandana"] |
| 301 | + print("Inserting words:", words) |
| 302 | + for word in words: |
| 303 | + trie.insert(word) |
| 304 | + |
| 305 | + print(f"\nTotal words in trie: {trie.size()}") |
| 306 | + |
| 307 | + # Search for words |
| 308 | + print("\n--- Search Operations ---") |
| 309 | + print(f"Search 'apple': {trie.search('apple')}") # True |
| 310 | + print(f"Search 'app': {trie.search('app')}") # True |
| 311 | + print(f"Search 'appl': {trie.search('appl')}") # False |
| 312 | + |
| 313 | + # Check prefixes |
| 314 | + print("\n--- Prefix Operations ---") |
| 315 | + print(f"Starts with 'app': {trie.starts_with('app')}") # True |
| 316 | + print(f"Starts with 'ban': {trie.starts_with('ban')}") # True |
| 317 | + print(f"Starts with 'cat': {trie.starts_with('cat')}") # False |
| 318 | + |
| 319 | + # Get all words with prefix |
| 320 | + print("\n--- Autocomplete ---") |
| 321 | + print(f"Words with prefix 'app': {trie.get_all_words_with_prefix('app')}") |
| 322 | + print(f"Words with prefix 'ban': {trie.get_all_words_with_prefix('ban')}") |
| 323 | + |
| 324 | + # Autocomplete suggestions |
| 325 | + print(f"\nAutocomplete for 'app' (limit 3): {trie.autocomplete('app', 3)}") |
| 326 | + |
| 327 | + # Count words with prefix |
| 328 | + print("\n--- Count Operations ---") |
| 329 | + print(f"Count words with prefix 'app': {trie.count_words_with_prefix('app')}") |
| 330 | + print(f"Count words with prefix 'ban': {trie.count_words_with_prefix('ban')}") |
| 331 | + |
| 332 | + # Get all words |
| 333 | + print(f"\nAll words in trie: {trie.get_all_words()}") |
| 334 | + |
| 335 | + # Delete operations |
| 336 | + print("\n--- Delete Operations ---") |
| 337 | + print(f"Delete 'app': {trie.delete('app')}") |
| 338 | + print(f"Search 'app' after deletion: {trie.search('app')}") |
| 339 | + print(f"Search 'apple' after deleting 'app': {trie.search('apple')}") |
| 340 | + print(f"Total words after deletion: {trie.size()}") |
| 341 | + |
| 342 | + # Longest common prefix |
| 343 | + trie2 = Trie() |
| 344 | + trie2.insert("flower") |
| 345 | + trie2.insert("flow") |
| 346 | + trie2.insert("flight") |
| 347 | + print(f"\nLongest common prefix: '{trie2.longest_common_prefix()}'") |
0 commit comments