N = 0; // N = Number of items processed
NOTE: algorithm will select an avg. of f*N tuples...
while ( not EOF )
{
t = read_next_tuple(); // Get next tuple from input
// Select tuple if coin toss is success...
// random() returns a random number in (0..1)
if ( random() < f )
{
Sample[N] = t;
N = N+1;
}
}
|
|
|
|
|
S = empty;
while ( not EOF )
{
t = read_next_tuple(); // Get next tuple from input
// Select tuple if coin toss is success...
// random() returns a random number in (0..1)
if ( random() < f )
{
if ( t ∈ S )
{
increase count of the t value in S
}
else
{
add (t,1) to S
}
}
}
|
|
Example: We want to keep the sample size <= 4
|
|
T = 1; // 1/T = selection probability
S = empty; // S = Concise Sample
while ( not EOF )
{
t = next input value;
if ( random() < 1/T )
{
if ( t ∈ S )
{
increase count of the t value in S
}
else
{
add (t,1) to S
}
}
/* -------------------------------------------
Deletion step:
Adjust sample when it gets too large...
------------------------------------------- */
if ( size(S) > MaxSize )
{
T' = α × T; // New selection probab
for ( each sample t ∈ S ) do
{
for ( i = 1; i <= t.count; i++ )
{
if ( random() < (1 - T/T') )
t.count--;
}
if ( t.count == 0 )
delete t from S;
}
T = T';
}
}
|
|
|
|
(Ingenious :-))