/* -----------------------------------------------------
Initialization:
----------------------------------------------------- */
D = empty; // Empty list...
bcurrent = 1; // First current bucket
N = 0; // Number of items processed
/* ---------------------------------------------------
Main processing loop
--------------------------------------------------- */
while ( not end of stream ) do
{
x = next item in stream;
N = N + 1; // One more item processed
/* --------------------------------
Insert phase
-------------------------------- */
if ( x ∈ D )
{
fx++; // Increase its count
}
else
{
insert (x, 1, bcurrent-1) into D;
// Add x to D with frequency count = 1
// The maximum error Δ is set to (bcurrent- 1)
}
/* -----------------------------------------------------------
Delete phase: Space Reduction step...
Note: this step is executed once every w insertions
I.e., when one bucket fills up !
----------------------------------------------------------- */
if ( N mod w == 0 )
{ // Bucket boundary reached, cleanup the infrequent items !!
for ( each element i ∈ D ) do
{
if ( fi + Δi ≤ bcurrent )
delete (i, fi, Δi) from D;
}
bcurrent++; // Start a new bucket...
}
}
/* ---------------------------------------------------
Output phase
--------------------------------------------------- */
for ( each element i ∈ D ) do
{
if ( fi ≥ (s - ε) × N )
{
Print i, fi
}
}
|
|
where s is a set of items.
D = empty; // Empty list...
bcurrent = 1;
N = 0; // Number of items processed
while (NOT EOF) do
{
x = next item SET in stream;
N = N + 1; // One more item SET processed
/* ---------------------------------------
Insert step: insert all subsets of x !!
--------------------------------------- */
for ( each subset s ⊆ x ) do
{
if ( s ∈ D )
{
fs++; // Increase its count
}
else
{
insert (s, 1, bcurrent-1) into D;
// Add s to D with frequency count = 1
// The maximum error Δ is set to (bcurrent- 1)
}
}
/* ----------------------------------------
Delete step... - unchanged
---------------------------------------- */
if ( N == 0 mod w )
{ // Bucket boundary reached, cleanup the infrequent items !!
for each (si, fi, Δi,) ∈ D do
{
if ( fi + Δi ≤ bcurrent )
delete (si, fi, Δi) from D;
}
bcurrent++; // Next bucket...
}
}
Output all entries with fi ≥ (s - ε) × N
|