int KMP(String T, String P)
{
f() = KMP_failure_function(P);
i0 = 0;
i = 0;
j = 0;
n = T.length();
m = P.length();
while ( i < n )
{
if ( P[j] == T[i] )
{
i++;
j++;
/* ------------------------
Check if we found P
------------------------ */
if ( j == m )
{
return( i0 ); // Found P at i0 in T !
}
}
else
{ /* ---------------------------
Fail to match at P[j]
---------------------------- */
if ( j == 0 )
{ /* -------------------------------------------------
No prefix information ==> slide P up 1 position
------------------------------------------------- */
i0++; // Slide 1 character over
i = i0;
j = 0; // This statement is not necessary...
}
else
{ /* -----------------------------------------------------
Use prefix info to perform "fast slide"
----------------------------------------------------- */
int k = f(j-1); // Max Overlap (= length of matching prefix)
j = k; // Restart matching at character
// after matching prefix
i0 = (i-j); // Shift pattern (i-j) characters
// i is unchanged !
}
}
}
return(-1); // No match found...
}
|
|
Proof:
|
0123456789012345678901234567801 (ruler)
i
|
v
T: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
P: yyyyyyy
^ ^
| |
| j
0123456 (ruler)
|
i0 = i - j
|
is the index of the first letter in text T alligned against the pattern.
while ( i < n )
{
if ( P.charAt(j) == T.charAt(i) )
{
i++;
j++;
if ( j == m ) // Check if pattern is complete
return( i0 ); // FOUND !!
}
else
{
if ( j == 0 )
{
i0++; // i0 increased by 1
i = i0;
j = 0;
}
else
{
j = f[j-1]; // Note: f[j-1] < j
i0 = (i-j); // i0 will increase because j was decreased
// i is unchanged
}
System.out.println("====================");
}
}
|
|
Summary:
|
|
Proof:
0123456789012345678901234567801 (ruler)
i
|
v
T: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
P: yyyyyyy
^ ^
| |
| j
0123456 (ruler)
|
i0 = i - j
<------------------------------>
range(i) = n characters in T
range(i0) = n characters in T
|
|
|
the maximum # times that the loop can ever be executed is:
# iteration ≤ 2 × n
|
(Otherwise, we will increase one of the variables i or k by more than n !!!)
|
int KMP_failure_function(String P)
{
int i, j, m;
int f[] = new int[P.length() + 1];
m = P.length();
f[0] = 0; // See KMP alg. for explanation (click here)
i = 1; // Next value of f[] to compute
j = 0;
/* ---------------------------------------
We are at this situation:
p1 (i=1)
p0 p1 .... pm-1 (j=0)
--------------------------------------- */
while ( i < m )
{
if ( P[j] == P[i] )
{
/* ------------------------
Case 1:
------------------------ */
f[i] = j; // ******* Case 1
/* ------------------------
Next position...
------------------------ */
i++;
j++;
}
else
{ /* ---------------------------
Case 2: does not match
---------------------------- */
if ( j == 0 )
{ /* -------------------------------------------------
No match possible
------------------------------------------------- */
f[i] = 0; // *********
i++;
}
else
{ /* -----------------------------------------------------
There is a prefix matched ==> go to the max matching
suffix of the prefix and CONTINUE the search
----------------------------------------------------- */
j = f(j-1); // Jump and continue search...
}
}
}
}
|
# time while loop can be executed ≤ 2 × m
|
(Otherwise, we will increase one of the variable i or k by more than n !!!)
|
|