S = empty;
while ( not EOF )
{
t = read_next_tuple(); // Get next tuple from input
if ( t ∈ S )
{
increase count of the t value in S
}
else if ( random() < 1/T )
{
add (t,1) to S
}
}
|
Input stream:
x x v v x x v x x x v v x x x x v x v v x ...
| | | | | | | |
not picked | | | | | |
(probability 1-1/T) | | | | | |
| | | | | |
First one | | | | |
picked | | | | |
(probab 1/T) | | | | |
v v v v v
c=1 c=2 c=3 c=4 c=5 c=6
subsequent values added with probab = 1
|
|
|
|
|
|
|
S = Counting Sample
for ( each entry (v,c) &isin S ) do
{
for (i = 1; i <= c-1; i++)
{
if ( random() < 1/T )
c = c - 1;
}
}
|
|
|
S = empty;
while ( not EOF )
{
t = read_next_tuple(); // Get next tuple from input
if ( t ∈ S )
{
increase count of the t value in S
}
else if ( random() < 1/T )
{
add (t,1) to S
}
}
|
|
But, we must make sure that:
|
T = 1; // Selection probability = 1/T
// Initial selection probability
S = empty; // S = the counting sample
while ( not EOF )
{
// ---------------------------------------------
// Inserting values in counting sample
// ---------------------------------------------
if ( t ∈ S )
{
increase count of the t value in S
}
else if ( random() < 1/T )
{
add (t,1) to S
}
// ---------------------------------------------------
// Reduce counting sample when size exceeds threshold
// ---------------------------------------------------
if ( size(S) > threshold )
{
T' = α × T; // α > 1
// T' is the new sampling probability
for ( each t ∈ S )
{
// ----------------------------------
// Adjust first element in the count
// ----------------------------------
if ( random() < T/T' )
continue; // success: select remaining
// element with probab. = 1
else
{
t.count--; // fail: delete
// ---------------------------------------
// Adjust remaining elements in the count
// ---------------------------------------
while ( t.count > 0 )
{
if ( random() < 1/T' )
break; // success; add remaining value
// with probab. 1
else
t.count--; // fail: delete
}
if ( t.count == 0 )
delete t from S
} // else
} // for
T = T'; // New selection probability...
}
}
|
|
|
(Ingenious :-))