Skip to content

Commit a9d5960

Browse files
Merge pull request #591 from Pasan11504/feature/add-kmp-algorithm-java
Add KMP (Knuth-Morris-Pratt) Pattern Matching Algorithm in Java
2 parents 18bafe3 + a6d797b commit a9d5960

File tree

1 file changed

+310
-0
lines changed

1 file changed

+310
-0
lines changed
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
/**
2+
* Algorithm: KMP (Knuth-Morris-Pratt) Pattern Matching Algorithm
3+
* Description: Efficient string searching algorithm that finds occurrences of a pattern
4+
* within a text string. Uses preprocessing to avoid unnecessary comparisons
5+
* by utilizing information from previous match attempts.
6+
*
7+
* Time Complexity: O(n + m) where n = text length, m = pattern length
8+
* Space Complexity: O(m) for the LPS (Longest Proper Prefix which is also Suffix) array
9+
*
10+
* Advantages over naive pattern matching:
11+
* - Linear time complexity instead of O(n*m)
12+
* - Never backtracks in the text string
13+
* - Efficient for patterns with repeating sub-patterns
14+
* - Preprocessing step enables smart shifting of pattern
15+
*
16+
* Use Cases:
17+
* - Text editors (find and replace functionality)
18+
* - DNA sequence matching in bioinformatics
19+
* - Plagiarism detection systems
20+
* - Search engines and text processing
21+
* - Network intrusion detection systems
22+
*
23+
* @author DSA_Code Contributors
24+
* @version 1.0
25+
* @since 2025-10-09
26+
*/
27+
28+
import java.util.ArrayList;
29+
import java.util.Arrays;
30+
import java.util.List;
31+
32+
public class KMPAlgorithm {
33+
34+
/**
35+
* Searches for all occurrences of a pattern in the given text using KMP algorithm.
36+
*
37+
* @param text the text string to search in
38+
* @param pattern the pattern string to search for
39+
* @return List of starting indices where pattern is found (empty list if not found)
40+
* @throws IllegalArgumentException if text or pattern is null or pattern is empty
41+
*/
42+
public static List<Integer> search(String text, String pattern) {
43+
// Input validation
44+
if (text == null || pattern == null) {
45+
throw new IllegalArgumentException("Text and pattern cannot be null");
46+
}
47+
if (pattern.isEmpty()) {
48+
throw new IllegalArgumentException("Pattern cannot be empty");
49+
}
50+
51+
List<Integer> occurrences = new ArrayList<>();
52+
53+
// Handle edge case: pattern longer than text
54+
if (pattern.length() > text.length()) {
55+
return occurrences;
56+
}
57+
58+
// Compute LPS (Longest Proper Prefix which is also Suffix) array
59+
int[] lps = computeLPS(pattern);
60+
61+
int i = 0; // index for text
62+
int j = 0; // index for pattern
63+
64+
// Scan through the text
65+
while (i < text.length()) {
66+
// Characters match - advance both pointers
67+
if (text.charAt(i) == pattern.charAt(j)) {
68+
i++;
69+
j++;
70+
}
71+
72+
// Found complete pattern match
73+
if (j == pattern.length()) {
74+
occurrences.add(i - j); // Record starting index
75+
j = lps[j - 1]; // Continue searching with LPS information
76+
}
77+
// Mismatch after some matches
78+
else if (i < text.length() && text.charAt(i) != pattern.charAt(j)) {
79+
if (j != 0) {
80+
// Use LPS to avoid re-matching already matched characters
81+
j = lps[j - 1];
82+
} else {
83+
// No match at all, move to next character in text
84+
i++;
85+
}
86+
}
87+
}
88+
89+
return occurrences;
90+
}
91+
92+
/**
93+
* Computes the LPS (Longest Proper Prefix which is also Suffix) array for the pattern.
94+
* This preprocessing step is the key to KMP's efficiency.
95+
*
96+
* LPS[i] = length of the longest proper prefix of pattern[0..i] which is also a suffix
97+
*
98+
* Example: For pattern "ABABC"
99+
* - LPS[0] = 0 (base case)
100+
* - LPS[1] = 0 (no proper prefix)
101+
* - LPS[2] = 1 ("A" is both prefix and suffix of "ABA")
102+
* - LPS[3] = 2 ("AB" is both prefix and suffix of "ABAB")
103+
* - LPS[4] = 0 (no proper prefix of "ABABC" is also a suffix)
104+
*
105+
* @param pattern the pattern string to preprocess
106+
* @return LPS array where lps[i] stores length of longest proper prefix which is also suffix
107+
*/
108+
private static int[] computeLPS(String pattern) {
109+
int m = pattern.length();
110+
int[] lps = new int[m];
111+
112+
// LPS[0] is always 0 (no proper prefix/suffix for single character)
113+
lps[0] = 0;
114+
115+
int len = 0; // length of previous longest prefix suffix
116+
int i = 1;
117+
118+
// Build LPS array by comparing characters
119+
while (i < m) {
120+
if (pattern.charAt(i) == pattern.charAt(len)) {
121+
// Characters match - extend the current prefix-suffix
122+
len++;
123+
lps[i] = len;
124+
i++;
125+
} else {
126+
// Mismatch after some matches
127+
if (len != 0) {
128+
// Try smaller prefix-suffix
129+
// This is the key optimization - we don't start from 0
130+
len = lps[len - 1];
131+
} else {
132+
// No match possible
133+
lps[i] = 0;
134+
i++;
135+
}
136+
}
137+
}
138+
139+
return lps;
140+
}
141+
142+
/**
143+
* Finds the first occurrence of pattern in text.
144+
*
145+
* @param text the text string to search in
146+
* @param pattern the pattern string to search for
147+
* @return index of first occurrence, or -1 if not found
148+
*/
149+
public static int searchFirst(String text, String pattern) {
150+
List<Integer> occurrences = search(text, pattern);
151+
return occurrences.isEmpty() ? -1 : occurrences.get(0);
152+
}
153+
154+
/**
155+
* Checks if pattern exists in text.
156+
*
157+
* @param text the text string to search in
158+
* @param pattern the pattern string to search for
159+
* @return true if pattern is found, false otherwise
160+
*/
161+
public static boolean contains(String text, String pattern) {
162+
return searchFirst(text, pattern) != -1;
163+
}
164+
165+
/**
166+
* Counts total number of occurrences of pattern in text.
167+
*
168+
* @param text the text string to search in
169+
* @param pattern the pattern string to search for
170+
* @return count of occurrences
171+
*/
172+
public static int countOccurrences(String text, String pattern) {
173+
return search(text, pattern).size();
174+
}
175+
176+
/**
177+
* Prints detailed information about the search process.
178+
*/
179+
private static void printSearchDetails(String text, String pattern, List<Integer> occurrences) {
180+
System.out.println("Text: \"" + text + "\"");
181+
System.out.println("Pattern: \"" + pattern + "\"");
182+
System.out.println("LPS Array: " + Arrays.toString(computeLPS(pattern)));
183+
184+
if (occurrences.isEmpty()) {
185+
System.out.println("Result: Pattern not found");
186+
} else {
187+
System.out.println("Result: Pattern found at indices: " + occurrences);
188+
System.out.println("Total occurrences: " + occurrences.size());
189+
190+
// Highlight matches in text (simple visualization without handling overlaps)
191+
System.out.print("Matches: ");
192+
for (int i = 0; i < occurrences.size(); i++) {
193+
if (i > 0) System.out.print(", ");
194+
System.out.print("index " + occurrences.get(i));
195+
}
196+
System.out.println();
197+
}
198+
System.out.println();
199+
}
200+
201+
/**
202+
* Comprehensive test cases demonstrating KMP algorithm functionality.
203+
*/
204+
public static void main(String[] args) {
205+
System.out.println("=== KMP (Knuth-Morris-Pratt) Pattern Matching Algorithm ===\n");
206+
207+
// Test Case 1: Simple pattern matching
208+
System.out.println("Test Case 1: Simple Pattern Match");
209+
String text1 = "ABABDABACDABABCABAB";
210+
String pattern1 = "ABABCABAB";
211+
List<Integer> result1 = search(text1, pattern1);
212+
printSearchDetails(text1, pattern1, result1);
213+
214+
// Test Case 2: Multiple occurrences
215+
System.out.println("Test Case 2: Multiple Occurrences");
216+
String text2 = "AABAACAADAABAABA";
217+
String pattern2 = "AABA";
218+
List<Integer> result2 = search(text2, pattern2);
219+
printSearchDetails(text2, pattern2, result2);
220+
221+
// Test Case 3: Overlapping patterns
222+
System.out.println("Test Case 3: Overlapping Patterns");
223+
String text3 = "AAAA";
224+
String pattern3 = "AA";
225+
List<Integer> result3 = search(text3, pattern3);
226+
printSearchDetails(text3, pattern3, result3);
227+
228+
// Test Case 4: Pattern not found
229+
System.out.println("Test Case 4: Pattern Not Found");
230+
String text4 = "ABCDEFGH";
231+
String pattern4 = "XYZ";
232+
List<Integer> result4 = search(text4, pattern4);
233+
printSearchDetails(text4, pattern4, result4);
234+
235+
// Test Case 5: Pattern at the beginning
236+
System.out.println("Test Case 5: Pattern at Beginning");
237+
String text5 = "HELLO WORLD";
238+
String pattern5 = "HELLO";
239+
List<Integer> result5 = search(text5, pattern5);
240+
printSearchDetails(text5, pattern5, result5);
241+
242+
// Test Case 6: Pattern at the end
243+
System.out.println("Test Case 6: Pattern at End");
244+
String text6 = "HELLO WORLD";
245+
String pattern6 = "WORLD";
246+
List<Integer> result6 = search(text6, pattern6);
247+
printSearchDetails(text6, pattern6, result6);
248+
249+
// Test Case 7: Single character pattern
250+
System.out.println("Test Case 7: Single Character Pattern");
251+
String text7 = "ABACABAD";
252+
String pattern7 = "A";
253+
List<Integer> result7 = search(text7, pattern7);
254+
printSearchDetails(text7, pattern7, result7);
255+
256+
// Test Case 8: Pattern equals text
257+
System.out.println("Test Case 8: Pattern Equals Text");
258+
String text8 = "EXACT";
259+
String pattern8 = "EXACT";
260+
List<Integer> result8 = search(text8, pattern8);
261+
printSearchDetails(text8, pattern8, result8);
262+
263+
// Test Case 9: DNA sequence matching (practical example)
264+
System.out.println("Test Case 9: DNA Sequence Matching");
265+
String dna = "GCATCGCAGAGAGTATACAGTACG";
266+
String sequence = "AGAGAG";
267+
List<Integer> dnaResult = search(dna, sequence);
268+
printSearchDetails(dna, sequence, dnaResult);
269+
270+
// Test Case 10: Utility methods
271+
System.out.println("Test Case 10: Utility Methods");
272+
String text10 = "The quick brown fox jumps over the lazy dog";
273+
String pattern10 = "the";
274+
System.out.println("Text: \"" + text10 + "\"");
275+
System.out.println("Pattern: \"" + pattern10 + "\"");
276+
System.out.println("First occurrence: " + searchFirst(text10, pattern10));
277+
System.out.println("Contains pattern: " + contains(text10, pattern10));
278+
System.out.println("Total count: " + countOccurrences(text10, pattern10));
279+
System.out.println();
280+
281+
// Test Case 11: Error handling - null text
282+
System.out.println("Test Case 11: Error Handling (Null Text)");
283+
try {
284+
search(null, "pattern");
285+
System.out.println("ERROR: Should have thrown exception!");
286+
} catch (IllegalArgumentException e) {
287+
System.out.println("Correctly caught exception: " + e.getMessage());
288+
}
289+
System.out.println();
290+
291+
// Test Case 12: Error handling - empty pattern
292+
System.out.println("Test Case 12: Error Handling (Empty Pattern)");
293+
try {
294+
search("text", "");
295+
System.out.println("ERROR: Should have thrown exception!");
296+
} catch (IllegalArgumentException e) {
297+
System.out.println("Correctly caught exception: " + e.getMessage());
298+
}
299+
System.out.println();
300+
301+
// Test Case 13: Pattern longer than text
302+
System.out.println("Test Case 13: Pattern Longer Than Text");
303+
String text13 = "SHORT";
304+
String pattern13 = "VERYLONGPATTERN";
305+
List<Integer> result13 = search(text13, pattern13);
306+
printSearchDetails(text13, pattern13, result13);
307+
308+
System.out.println("=== All tests completed successfully! ===");
309+
}
310+
}

0 commit comments

Comments
 (0)