The Knuth-Morris-Pratt (KMP) algorithm is a string searching algorithm that efficiently finds occurrences of a pattern string within another text string.
It works by precomputing a "partial match" table (also known as the "failure" or "prefix" function), which helps to avoid redundant comparisons by taking advantage of the information about the pattern itself.
pseudocode:
function computeLPSArray(pattern, M, lps[])
len = 0
lps[0] = 0
i = 1
while i < M
if pattern[i] == pattern[len]
len++
lps[i] = len
i++
else
if len != 0
len = lps[len - 1]
else
lps[i] = 0
i++
function KMPsearch(text, pattern)
N = length of text
M = length of pattern
lps[M]
computeLPSArray(pattern, M, lps)
i = 0
j = 0
while i < N
if pattern[j] == text[i]
i++
j++
if j == M
print "Pattern found at index " + (i - j)
j = lps[j - 1]
else if i < N and pattern[j] != text[i]
if j != 0
j = lps[j - 1]
else
i++
functioncomputeLPSArray(pattern) {let lps = [0];let len =0;let i =1;while (i <pattern.length) {if (pattern[i] === pattern[len]) { len++; lps[i] = len; i++; } else {if (len !==0) { len = lps[len -1]; } else { lps[i] =0; i++; } } }return lps;}functionKMP(text, pattern) {let m =text.length;let n =pattern.length;let lps =computeLPSArray(pattern);let i =0; // Index for text[]let j =0; // Index for pattern[]while (i < m) {if (pattern[j] === text[i]) { i++; j++; }if (j === n) {console.log("Pattern found at index "+ (i - j)); j = lps[j -1]; } elseif (i < m && pattern[j] !== text[i]) {if (j !==0) j = lps[j -1];else i++; } }}// Example usage:let text ="ABCABCDABABCDABCDABDE";let pattern ="ABCDABD";KMP(text, pattern);
In this implementation:
computeLPSArray(pattern) calculates the Longest Prefix Suffix (LPS) array for the given pattern.
KMP(text, pattern) is the main function that searches for occurrences of the pattern in the text using the precomputed LPS array.
For example, if you run this code, it will output:
Pattern found at index 4
Pattern found at index 12
indicating that the pattern "ABCDABD" was found at indices 4 and 12 within the text "ABCABCDABABCDABCDABDE".