|
| 1 | +/** |
| 2 | + * Algorithm: KMP (Knuth-Morris-Pratt) Pattern Matching Algorithm |
| 3 | + * Description: Efficient string searching algorithm that finds occurrences of a pattern |
| 4 | + * within a text string. Uses preprocessing to avoid unnecessary comparisons |
| 5 | + * by utilizing information from previous match attempts. |
| 6 | + * |
| 7 | + * Time Complexity: O(n + m) where n = text length, m = pattern length |
| 8 | + * Space Complexity: O(m) for the LPS (Longest Proper Prefix which is also Suffix) array |
| 9 | + * |
| 10 | + * Advantages over naive pattern matching: |
| 11 | + * - Linear time complexity instead of O(n*m) |
| 12 | + * - Never backtracks in the text string |
| 13 | + * - Efficient for patterns with repeating sub-patterns |
| 14 | + * - Preprocessing step enables smart shifting of pattern |
| 15 | + * |
| 16 | + * Use Cases: |
| 17 | + * - Text editors (find and replace functionality) |
| 18 | + * - DNA sequence matching in bioinformatics |
| 19 | + * - Plagiarism detection systems |
| 20 | + * - Search engines and text processing |
| 21 | + * - Network intrusion detection systems |
| 22 | + * |
| 23 | + * @author DSA_Code Contributors |
| 24 | + * @version 1.0 |
| 25 | + * @since 2025-10-09 |
| 26 | + */ |
| 27 | + |
| 28 | +import java.util.ArrayList; |
| 29 | +import java.util.Arrays; |
| 30 | +import java.util.List; |
| 31 | + |
| 32 | +public class KMPAlgorithm { |
| 33 | + |
| 34 | + /** |
| 35 | + * Searches for all occurrences of a pattern in the given text using KMP algorithm. |
| 36 | + * |
| 37 | + * @param text the text string to search in |
| 38 | + * @param pattern the pattern string to search for |
| 39 | + * @return List of starting indices where pattern is found (empty list if not found) |
| 40 | + * @throws IllegalArgumentException if text or pattern is null or pattern is empty |
| 41 | + */ |
| 42 | + public static List<Integer> search(String text, String pattern) { |
| 43 | + // Input validation |
| 44 | + if (text == null || pattern == null) { |
| 45 | + throw new IllegalArgumentException("Text and pattern cannot be null"); |
| 46 | + } |
| 47 | + if (pattern.isEmpty()) { |
| 48 | + throw new IllegalArgumentException("Pattern cannot be empty"); |
| 49 | + } |
| 50 | + |
| 51 | + List<Integer> occurrences = new ArrayList<>(); |
| 52 | + |
| 53 | + // Handle edge case: pattern longer than text |
| 54 | + if (pattern.length() > text.length()) { |
| 55 | + return occurrences; |
| 56 | + } |
| 57 | + |
| 58 | + // Compute LPS (Longest Proper Prefix which is also Suffix) array |
| 59 | + int[] lps = computeLPS(pattern); |
| 60 | + |
| 61 | + int i = 0; // index for text |
| 62 | + int j = 0; // index for pattern |
| 63 | + |
| 64 | + // Scan through the text |
| 65 | + while (i < text.length()) { |
| 66 | + // Characters match - advance both pointers |
| 67 | + if (text.charAt(i) == pattern.charAt(j)) { |
| 68 | + i++; |
| 69 | + j++; |
| 70 | + } |
| 71 | + |
| 72 | + // Found complete pattern match |
| 73 | + if (j == pattern.length()) { |
| 74 | + occurrences.add(i - j); // Record starting index |
| 75 | + j = lps[j - 1]; // Continue searching with LPS information |
| 76 | + } |
| 77 | + // Mismatch after some matches |
| 78 | + else if (i < text.length() && text.charAt(i) != pattern.charAt(j)) { |
| 79 | + if (j != 0) { |
| 80 | + // Use LPS to avoid re-matching already matched characters |
| 81 | + j = lps[j - 1]; |
| 82 | + } else { |
| 83 | + // No match at all, move to next character in text |
| 84 | + i++; |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + return occurrences; |
| 90 | + } |
| 91 | + |
| 92 | + /** |
| 93 | + * Computes the LPS (Longest Proper Prefix which is also Suffix) array for the pattern. |
| 94 | + * This preprocessing step is the key to KMP's efficiency. |
| 95 | + * |
| 96 | + * LPS[i] = length of the longest proper prefix of pattern[0..i] which is also a suffix |
| 97 | + * |
| 98 | + * Example: For pattern "ABABC" |
| 99 | + * - LPS[0] = 0 (base case) |
| 100 | + * - LPS[1] = 0 (no proper prefix) |
| 101 | + * - LPS[2] = 1 ("A" is both prefix and suffix of "ABA") |
| 102 | + * - LPS[3] = 2 ("AB" is both prefix and suffix of "ABAB") |
| 103 | + * - LPS[4] = 0 (no proper prefix of "ABABC" is also a suffix) |
| 104 | + * |
| 105 | + * @param pattern the pattern string to preprocess |
| 106 | + * @return LPS array where lps[i] stores length of longest proper prefix which is also suffix |
| 107 | + */ |
| 108 | + private static int[] computeLPS(String pattern) { |
| 109 | + int m = pattern.length(); |
| 110 | + int[] lps = new int[m]; |
| 111 | + |
| 112 | + // LPS[0] is always 0 (no proper prefix/suffix for single character) |
| 113 | + lps[0] = 0; |
| 114 | + |
| 115 | + int len = 0; // length of previous longest prefix suffix |
| 116 | + int i = 1; |
| 117 | + |
| 118 | + // Build LPS array by comparing characters |
| 119 | + while (i < m) { |
| 120 | + if (pattern.charAt(i) == pattern.charAt(len)) { |
| 121 | + // Characters match - extend the current prefix-suffix |
| 122 | + len++; |
| 123 | + lps[i] = len; |
| 124 | + i++; |
| 125 | + } else { |
| 126 | + // Mismatch after some matches |
| 127 | + if (len != 0) { |
| 128 | + // Try smaller prefix-suffix |
| 129 | + // This is the key optimization - we don't start from 0 |
| 130 | + len = lps[len - 1]; |
| 131 | + } else { |
| 132 | + // No match possible |
| 133 | + lps[i] = 0; |
| 134 | + i++; |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + return lps; |
| 140 | + } |
| 141 | + |
| 142 | + /** |
| 143 | + * Finds the first occurrence of pattern in text. |
| 144 | + * |
| 145 | + * @param text the text string to search in |
| 146 | + * @param pattern the pattern string to search for |
| 147 | + * @return index of first occurrence, or -1 if not found |
| 148 | + */ |
| 149 | + public static int searchFirst(String text, String pattern) { |
| 150 | + List<Integer> occurrences = search(text, pattern); |
| 151 | + return occurrences.isEmpty() ? -1 : occurrences.get(0); |
| 152 | + } |
| 153 | + |
| 154 | + /** |
| 155 | + * Checks if pattern exists in text. |
| 156 | + * |
| 157 | + * @param text the text string to search in |
| 158 | + * @param pattern the pattern string to search for |
| 159 | + * @return true if pattern is found, false otherwise |
| 160 | + */ |
| 161 | + public static boolean contains(String text, String pattern) { |
| 162 | + return searchFirst(text, pattern) != -1; |
| 163 | + } |
| 164 | + |
| 165 | + /** |
| 166 | + * Counts total number of occurrences of pattern in text. |
| 167 | + * |
| 168 | + * @param text the text string to search in |
| 169 | + * @param pattern the pattern string to search for |
| 170 | + * @return count of occurrences |
| 171 | + */ |
| 172 | + public static int countOccurrences(String text, String pattern) { |
| 173 | + return search(text, pattern).size(); |
| 174 | + } |
| 175 | + |
| 176 | + /** |
| 177 | + * Prints detailed information about the search process. |
| 178 | + */ |
| 179 | + private static void printSearchDetails(String text, String pattern, List<Integer> occurrences) { |
| 180 | + System.out.println("Text: \"" + text + "\""); |
| 181 | + System.out.println("Pattern: \"" + pattern + "\""); |
| 182 | + System.out.println("LPS Array: " + Arrays.toString(computeLPS(pattern))); |
| 183 | + |
| 184 | + if (occurrences.isEmpty()) { |
| 185 | + System.out.println("Result: Pattern not found"); |
| 186 | + } else { |
| 187 | + System.out.println("Result: Pattern found at indices: " + occurrences); |
| 188 | + System.out.println("Total occurrences: " + occurrences.size()); |
| 189 | + |
| 190 | + // Highlight matches in text (simple visualization without handling overlaps) |
| 191 | + System.out.print("Matches: "); |
| 192 | + for (int i = 0; i < occurrences.size(); i++) { |
| 193 | + if (i > 0) System.out.print(", "); |
| 194 | + System.out.print("index " + occurrences.get(i)); |
| 195 | + } |
| 196 | + System.out.println(); |
| 197 | + } |
| 198 | + System.out.println(); |
| 199 | + } |
| 200 | + |
| 201 | + /** |
| 202 | + * Comprehensive test cases demonstrating KMP algorithm functionality. |
| 203 | + */ |
| 204 | + public static void main(String[] args) { |
| 205 | + System.out.println("=== KMP (Knuth-Morris-Pratt) Pattern Matching Algorithm ===\n"); |
| 206 | + |
| 207 | + // Test Case 1: Simple pattern matching |
| 208 | + System.out.println("Test Case 1: Simple Pattern Match"); |
| 209 | + String text1 = "ABABDABACDABABCABAB"; |
| 210 | + String pattern1 = "ABABCABAB"; |
| 211 | + List<Integer> result1 = search(text1, pattern1); |
| 212 | + printSearchDetails(text1, pattern1, result1); |
| 213 | + |
| 214 | + // Test Case 2: Multiple occurrences |
| 215 | + System.out.println("Test Case 2: Multiple Occurrences"); |
| 216 | + String text2 = "AABAACAADAABAABA"; |
| 217 | + String pattern2 = "AABA"; |
| 218 | + List<Integer> result2 = search(text2, pattern2); |
| 219 | + printSearchDetails(text2, pattern2, result2); |
| 220 | + |
| 221 | + // Test Case 3: Overlapping patterns |
| 222 | + System.out.println("Test Case 3: Overlapping Patterns"); |
| 223 | + String text3 = "AAAA"; |
| 224 | + String pattern3 = "AA"; |
| 225 | + List<Integer> result3 = search(text3, pattern3); |
| 226 | + printSearchDetails(text3, pattern3, result3); |
| 227 | + |
| 228 | + // Test Case 4: Pattern not found |
| 229 | + System.out.println("Test Case 4: Pattern Not Found"); |
| 230 | + String text4 = "ABCDEFGH"; |
| 231 | + String pattern4 = "XYZ"; |
| 232 | + List<Integer> result4 = search(text4, pattern4); |
| 233 | + printSearchDetails(text4, pattern4, result4); |
| 234 | + |
| 235 | + // Test Case 5: Pattern at the beginning |
| 236 | + System.out.println("Test Case 5: Pattern at Beginning"); |
| 237 | + String text5 = "HELLO WORLD"; |
| 238 | + String pattern5 = "HELLO"; |
| 239 | + List<Integer> result5 = search(text5, pattern5); |
| 240 | + printSearchDetails(text5, pattern5, result5); |
| 241 | + |
| 242 | + // Test Case 6: Pattern at the end |
| 243 | + System.out.println("Test Case 6: Pattern at End"); |
| 244 | + String text6 = "HELLO WORLD"; |
| 245 | + String pattern6 = "WORLD"; |
| 246 | + List<Integer> result6 = search(text6, pattern6); |
| 247 | + printSearchDetails(text6, pattern6, result6); |
| 248 | + |
| 249 | + // Test Case 7: Single character pattern |
| 250 | + System.out.println("Test Case 7: Single Character Pattern"); |
| 251 | + String text7 = "ABACABAD"; |
| 252 | + String pattern7 = "A"; |
| 253 | + List<Integer> result7 = search(text7, pattern7); |
| 254 | + printSearchDetails(text7, pattern7, result7); |
| 255 | + |
| 256 | + // Test Case 8: Pattern equals text |
| 257 | + System.out.println("Test Case 8: Pattern Equals Text"); |
| 258 | + String text8 = "EXACT"; |
| 259 | + String pattern8 = "EXACT"; |
| 260 | + List<Integer> result8 = search(text8, pattern8); |
| 261 | + printSearchDetails(text8, pattern8, result8); |
| 262 | + |
| 263 | + // Test Case 9: DNA sequence matching (practical example) |
| 264 | + System.out.println("Test Case 9: DNA Sequence Matching"); |
| 265 | + String dna = "GCATCGCAGAGAGTATACAGTACG"; |
| 266 | + String sequence = "AGAGAG"; |
| 267 | + List<Integer> dnaResult = search(dna, sequence); |
| 268 | + printSearchDetails(dna, sequence, dnaResult); |
| 269 | + |
| 270 | + // Test Case 10: Utility methods |
| 271 | + System.out.println("Test Case 10: Utility Methods"); |
| 272 | + String text10 = "The quick brown fox jumps over the lazy dog"; |
| 273 | + String pattern10 = "the"; |
| 274 | + System.out.println("Text: \"" + text10 + "\""); |
| 275 | + System.out.println("Pattern: \"" + pattern10 + "\""); |
| 276 | + System.out.println("First occurrence: " + searchFirst(text10, pattern10)); |
| 277 | + System.out.println("Contains pattern: " + contains(text10, pattern10)); |
| 278 | + System.out.println("Total count: " + countOccurrences(text10, pattern10)); |
| 279 | + System.out.println(); |
| 280 | + |
| 281 | + // Test Case 11: Error handling - null text |
| 282 | + System.out.println("Test Case 11: Error Handling (Null Text)"); |
| 283 | + try { |
| 284 | + search(null, "pattern"); |
| 285 | + System.out.println("ERROR: Should have thrown exception!"); |
| 286 | + } catch (IllegalArgumentException e) { |
| 287 | + System.out.println("Correctly caught exception: " + e.getMessage()); |
| 288 | + } |
| 289 | + System.out.println(); |
| 290 | + |
| 291 | + // Test Case 12: Error handling - empty pattern |
| 292 | + System.out.println("Test Case 12: Error Handling (Empty Pattern)"); |
| 293 | + try { |
| 294 | + search("text", ""); |
| 295 | + System.out.println("ERROR: Should have thrown exception!"); |
| 296 | + } catch (IllegalArgumentException e) { |
| 297 | + System.out.println("Correctly caught exception: " + e.getMessage()); |
| 298 | + } |
| 299 | + System.out.println(); |
| 300 | + |
| 301 | + // Test Case 13: Pattern longer than text |
| 302 | + System.out.println("Test Case 13: Pattern Longer Than Text"); |
| 303 | + String text13 = "SHORT"; |
| 304 | + String pattern13 = "VERYLONGPATTERN"; |
| 305 | + List<Integer> result13 = search(text13, pattern13); |
| 306 | + printSearchDetails(text13, pattern13, result13); |
| 307 | + |
| 308 | + System.out.println("=== All tests completed successfully! ==="); |
| 309 | + } |
| 310 | +} |
0 commit comments