int KMP(String T, String P) { f() = KMP_failure_function(P); i0 = 0; i = 0; j = 0; n = T.length(); m = P.length(); while ( i < n ) { if ( P[j] == T[i] ) { i++; j++; /* ------------------------ Check if we found P ------------------------ */ if ( j == m ) { return( i0 ); // Found P at i0 in T ! } } else { /* --------------------------- Fail to match at P[j] ---------------------------- */ if ( j == 0 ) { /* ------------------------------------------------- No prefix information ==> slide P up 1 position ------------------------------------------------- */ i0++; // Slide 1 character over i = i0; j = 0; // This statement is not necessary... } else { /* ----------------------------------------------------- Use prefix info to perform "fast slide" ----------------------------------------------------- */ int k = f(j-1); // Max Overlap (= length of matching prefix) j = k; // Restart matching at character // after matching prefix i0 = (i-j); // Shift pattern (i-j) characters // i is unchanged ! } } } return(-1); // No match found... } |
|
Proof:
|
0123456789012345678901234567801 (ruler) i | v T: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx P: yyyyyyy ^ ^ | | | j 0123456 (ruler) | i0 = i - j |
is the index of the first letter in text T alligned against the pattern.
while ( i < n ) { if ( P.charAt(j) == T.charAt(i) ) { i++; j++; if ( j == m ) // Check if pattern is complete return( i0 ); // FOUND !! } else { if ( j == 0 ) { i0++; // i0 increased by 1 i = i0; j = 0; } else { j = f[j-1]; // Note: f[j-1] < j i0 = (i-j); // i0 will increase because j was decreased // i is unchanged } System.out.println("===================="); } } |
|
Summary:
|
|
Proof:
0123456789012345678901234567801 (ruler) i | v T: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx P: yyyyyyy ^ ^ | | | j 0123456 (ruler) | i0 = i - j <------------------------------> range(i) = n characters in T range(i0) = n characters in T |
|
|
the maximum # times that the loop can ever be executed is:
# iteration ≤ 2 × n |
(Otherwise, we will increase one of the variables i or k by more than n !!!)
|
int KMP_failure_function(String P) { int i, j, m; int f[] = new int[P.length() + 1]; m = P.length(); f[0] = 0; // See KMP alg. for explanation (click here) i = 1; // Next value of f[] to compute j = 0; /* --------------------------------------- We are at this situation: p1 (i=1) p0 p1 .... pm-1 (j=0) --------------------------------------- */ while ( i < m ) { if ( P[j] == P[i] ) { /* ------------------------ Case 1: ------------------------ */ f[i] = j; // ******* Case 1 /* ------------------------ Next position... ------------------------ */ i++; j++; } else { /* --------------------------- Case 2: does not match ---------------------------- */ if ( j == 0 ) { /* ------------------------------------------------- No match possible ------------------------------------------------- */ f[i] = 0; // ********* i++; } else { /* ----------------------------------------------------- There is a prefix matched ==> go to the max matching suffix of the prefix and CONTINUE the search ----------------------------------------------------- */ j = f(j-1); // Jump and continue search... } } } } |
# time while loop can be executed ≤ 2 × m |
(Otherwise, we will increase one of the variable i or k by more than n !!!)
|
|