n = T.length();
m = P.length();
i0 = 0; // Line P up with the first character of T
i = 0; // Start matching with first char in T
j = 0; // Start matching with first char in P
while ( i < n ) // Not all characters used
{
if ( T[i] == P[j] )
{
/* ===============================================
T[i] and P[j] match ==> try next pair
=============================================== */
i++; // Match next pair
j++;
if ( j == m )
return ( i0 ); // Match found at position i0 !!!
}
else
{ /* ===========================================
T[i] ≠ P[j]:
1. Slide P up 1 position
2. restart from beginning of string
=========================================== */
i0 = i0 + 1; // Slide pattern P one character further
i = i0; // Restart matching at position i0 in T
j = 0; // Restart matching at position 0 in P
}
}
return -1; // Return not found
}
|
|
Answer: Yes
Because:
|
In these example, make a note that:
|
|
|
OK, now we must consolidate what we have learned....
We will need some new terminology :)
|
|
|
Examples:
|
|
Reminder:
|
|
Note:
|
prefix = P[ 0..(j-1) ]; // Prefix of pattern at the mismatch
k = MaxOverlap( prefix ); // Compute max overlap
j = k;
i0 = (i - j);
// i is unchanged !
|
1 2
01234567890123456789012 (ruler)
i0=0 i=7
| |
v v
T: abadababaccabacabaabb
P: abadabacb
^
|
j=7
|
(When T[i] ≠ P[j] ):
prefix = P[ 0..(j-1) ]; // Prefix of pattern at the mismatch
k = MaxOverlap( prefix );
j = k;
i0 = (i - j);
|
in the Basic (brute-force) algorithm to speed up the process in a mismatch, we obtain the Knutt-Morris-Pratt (KMP) algorithm:
KMP( T, P )
{
int i0, i, j, m, n;
n = T.length();
m = P.length();
i0 = 0; // Line P up with the first character of T
i = 0; // Start matching with first char in T
j = 0; // Start matching with first char in P
while ( i < n ) // Not all characters used
{
if ( T[i] == P[j] )
{
i++; // Match next pair
j++;
if ( j == m )
return ( i0 ); // Match found atposition i0 !!!
}
else
{ /* ===========================================
T[i] ≠ P[j]
=========================================== */
if ( j == 0 )
{ /* ==============================================
First character already mismatched
We have NO prefix info. to work with...
=============================================== */
i0++; // Just slide P 1 character over
i = i0; //
j = 0;
}
else
{
prefix = P[ 0..(j-1) ]; // Prefix of pattern at the mismatch
k = MaxOverlap( prefix );
j = k;
i0 = (i - j);
// i is unchanged !
}
}
}
return -1; // No match found
}
|
We will do an example after discussing the KMP failure function first.....
.....
else
{
prefix = P[ 0..(j-1) ]; // Prefix of pattern at the mismatch
k = MaxOverlap( prefix );
j = k;
i0 = (i - j);
}
.....
|
|
Consquently:
|
|
|
|
Note:
|
Pattern:
Position: 012345
P: abacab
|
Note:
|
KMP( T, P )
{
int i0, i, j, m, n;
n = T.length();
m = P.length();
compute failure function f(k) (for all prefixes);
i0 = 0; // Line P up with the first character of T
i = 0; // Start matching with first char in T
j = 0; // Start matching with first char in P
while ( i < n ) // Not all characters used
{
if ( T[i] == P[j] )
{
i++; // Match next pair
j++;
if ( j == m )
return ( i0 ); // Match found atposition i0 !!!
}
else
{ /* ===========================================
T[i] ≠ P[j]
=========================================== */
if ( j == 0 )
{
i0++; // Slide 1 character over
i = i0; //
j = 0;
}
else
{ // Fast slide using prefix information
k = f(j-1); // = MaxOverlap( P[ 0..(j-1) ] )
// If j=1, f(j-1) = 0 will make pattern P
// slide down 1 character
j = k;
i0 = (i - j);
}
}
}
return -1; // No match found
}
|
int KMP(String T, String P)
{
f() = KMP_failure_function(P); // Discussed later !
i0 = 0;
i = 0;
j = 0;
n = T.length();
m = P.length();
while ( i < n )
{
if ( P[j] == T[i] )
{
i++;
j++;
/* ------------------------
Check if we found P
------------------------ */
if ( j == m )
{
return( i0 ); // Found P at i0 in T !
}
}
else
{ /* ---------------------------
Fail to match at P[j]
---------------------------- */
if ( j == 0 )
{ /* -------------------------------------------------
No prefix information ==> slide P up 1 position
------------------------------------------------- */
i0++; // Slide 1 character over
i = i0;
j = 0; // This statement is not necessary...
}
else
{ /* -----------------------------------------------------
Use prefix info to perform "fast slide"
----------------------------------------------------- */
int k = f(j-1); // Max Overlap (= length of matching prefix)
j = k; // Restart matching at character
// after matching prefix
i0 = (i-j); // Shift pattern (i-j) characters
// i is unchanged !
}
}
}
return(-1); // No match found...
}
|
How to run the program:
|