N = 0; // N = Number of items processed NOTE: algorithm will select an avg. of f*N tuples... while ( not EOF ) { t = read_next_tuple(); // Get next tuple from input // Select tuple if coin toss is success... // random() returns a random number in (0..1) if ( random() < f ) { Sample[N] = t; N = N+1; } } |
|
|
|
|
S = empty; while ( not EOF ) { t = read_next_tuple(); // Get next tuple from input // Select tuple if coin toss is success... // random() returns a random number in (0..1) if ( random() < f ) { if ( t ∈ S ) { increase count of the t value in S } else { add (t,1) to S } } } |
|
Example: We want to keep the sample size <= 4
|
|
T = 1; // 1/T = selection probability S = empty; // S = Concise Sample while ( not EOF ) { t = next input value; if ( random() < 1/T ) { if ( t ∈ S ) { increase count of the t value in S } else { add (t,1) to S } } /* ------------------------------------------- Deletion step: Adjust sample when it gets too large... ------------------------------------------- */ if ( size(S) > MaxSize ) { T' = α × T; // New selection probab for ( each sample t ∈ S ) do { for ( i = 1; i <= t.count; i++ ) { if ( random() < (1 - T/T') ) t.count--; } if ( t.count == 0 ) delete t from S; } T = T'; } } |
![]() |
|
|
(Ingenious :-))