|
This algorithm makes use of the Apriori-property to reduce the number of candidate frequent item sets:
|
|
|
Update(Transaction t, Lattice L, int i)
{
for ( each i-subset S ⊆ t ) do
{
if ( S ∈ Li )
{
/* ----------------------------
Item set S is in Li
---------------------------- */
S.count++; // Tally itemset
}
else
{
/* ---------------------------------
Item set S is not in Li
--------------------------------- */
if ( i ≤ 2 )
{
/* ------------------------------------
"Commonly found" item sets, insert
------------------------------------- */
Li.insert(S); // Always insert 1 and 2-itemsets
}
else
{
/* ---------------------------------------
Larger item sets...
Use the Apriori property to save space
--------------------------------------- */
if ( every (i-1)-subset U of S ∈ Li-1 )
{
Li-1,insert(S); // All subset of S must also be frequent !
}
}
}
}
}
|
Comments
|
CrossOver(Lattice L, int i)
{
for ( each i-itemset S ∈ Li ) do
{
s.count--;
if ( s.count == 0 )
{
Li.delete(S); // remove completely
}
}
}
|
Comments:
|
*** Note: parameters N and X are passed by reference !
int NumberOfTwoItemsetsPerTransaction(int N, int X, int t)
{
N++;
X = X + ( |t| × (|t|-1) / 2 ); // = # 2-item sets in t
return ( X/N );
}
|
Comment:
|
|
L = ∅; // L = all frequent itemsets
T = ∅; // T = unprocessed transactions (buffer)
N = 0; // N = number of transactions
X = 0; // X = number of 2-itemsets in transactions
c = 0; // Counts the number of times that "CrossOver" (delete) operation was performed
while ( not EOS(D) )
{
t = next transaction (set of items) in D;
T = T ∪ t;
/* ----------------------------------------------------------
Always update the 1-item and 2-items frequent item sets
---------------------------------------------------------- */
Update(t, L, 1); // insert t{1} in L
Update(t, L, 2); // insert t{2} in L
/* ----------------------------------------------------------
Processing of 3-items and higher-items frequent item sets
are delayed until L2 reaches a certain threshold
---------------------------------------------------------- */
f = NumberOfTwoItemsetsPerTransaction(N, X, t);
if ( | L2 | ≥ 1/(εθ) * f )
{
/* ------------------------------------------
First: Delete infrequent sets in L2
------------------------------------------ */
CrossOver(L, 2);
/* -------------------------------------------------------
Next: (delayed) process transactions into L3, L4, ...
------------------------------------------------------- */
i = 2;
while ( Li != ∅ )
{
i++;
/* -----------------------
Insert phase
----------------------- */
for ( each t ∈ T ) do
{
Update(t, L, i);
}
/* -----------------------
Delete phase
----------------------- */
CrossOver(L, i);
}
c++; // One more deletion completed...
T = ∅ // Re-initialize....
}
/* --------------------------------------------------------------
Post processing:
performs θ × |D| "CrossOver" (delete operation)
to remove the non-frequent item sets
-------------------------------------------------------------- */
while ( c < θ × |D| )
{
c++;
for ( each Li )
{
if ( Li != ∅ )
CrossOver(Li);
}
}
}
|
|
|
|
|
|