@@ -50,6 +50,15 @@ private LZ78() {
5050 public record Token (int index , char nextChar ) {
5151 }
5252
53+ /**
54+ * A node in the dictionary trie structure.
55+ * Each node represents a phrase and can have child nodes for extended phrases.
56+ */
57+ private static class TrieNode {
58+ Map <Character , TrieNode > children = new HashMap <>();
59+ int index = -1 ; // -1 means not assigned yet
60+ }
61+
5362 /**
5463 * Compresses the input text using the LZ78 algorithm.
5564 *
@@ -62,38 +71,33 @@ public static List<Token> compress(String text) {
6271 }
6372
6473 List <Token > compressedOutput = new ArrayList <>();
65- // Dictionary maps string phrases to their assigned index
66- Map <String , Integer > dictionary = new HashMap <>();
67- // Start index from 1, index 0 implicitly represents the empty string prefix
74+ TrieNode root = new TrieNode ();
6875 int nextDictionaryIndex = 1 ;
6976
70- StringBuilder currentPhrase = new StringBuilder () ;
77+ TrieNode currentNode = root ;
7178 int lastMatchedIndex = 0 ;
7279
7380 for (int i = 0 ; i < text .length (); i ++) {
7481 char currentChar = text .charAt (i );
75- currentPhrase .append (currentChar );
76- String phraseStr = currentPhrase .toString ();
7782
78- if (dictionary . containsKey (phraseStr )) {
79- // This phrase exists in dictionary, remember its index and continue
80- lastMatchedIndex = dictionary . get ( phraseStr ) ;
83+ if (currentNode . children . containsKey (currentChar )) {
84+ currentNode = currentNode . children . get ( currentChar );
85+ lastMatchedIndex = currentNode . index ;
8186 } else {
82- // This phrase does NOT exist in dictionary
8387 // Output: (index of longest matching prefix, current character)
8488 compressedOutput .add (new Token (lastMatchedIndex , currentChar ));
8589
86- // Add this new phrase to the dictionary
87- dictionary .put (phraseStr , nextDictionaryIndex ++);
90+ TrieNode newNode = new TrieNode ();
91+ newNode .index = nextDictionaryIndex ++;
92+ currentNode .children .put (currentChar , newNode );
8893
89- // Reset for next iteration
90- currentPhrase .setLength (0 );
94+ currentNode = root ;
9195 lastMatchedIndex = 0 ;
9296 }
9397 }
9498
9599 // Handle remaining phrase at end of input
96- if (! currentPhrase . isEmpty () ) {
100+ if (currentNode != root ) {
97101 compressedOutput .add (new Token (lastMatchedIndex , END_OF_STREAM ));
98102 }
99103
0 commit comments