S = empty; while ( not EOF ) { t = read_next_tuple(); // Get next tuple from input if ( t ∈ S ) { increase count of the t value in S } else if ( random() < 1/T ) { add (t,1) to S } } |
Input stream: x x v v x x v x x x v v x x x x v x v v x ... | | | | | | | | not picked | | | | | | (probability 1-1/T) | | | | | | | | | | | | First one | | | | | picked | | | | | (probab 1/T) | | | | | v v v v v c=1 c=2 c=3 c=4 c=5 c=6 subsequent values added with probab = 1 |
|
|
|
|
|
|
S = Counting Sample for ( each entry (v,c) &isin S ) do { for (i = 1; i <= c-1; i++) { if ( random() < 1/T ) c = c - 1; } } |
|
|
S = empty; while ( not EOF ) { t = read_next_tuple(); // Get next tuple from input if ( t ∈ S ) { increase count of the t value in S } else if ( random() < 1/T ) { add (t,1) to S } } |
|
But, we must make sure that:
|
T = 1; // Selection probability = 1/T // Initial selection probability S = empty; // S = the counting sample while ( not EOF ) { // --------------------------------------------- // Inserting values in counting sample // --------------------------------------------- if ( t ∈ S ) { increase count of the t value in S } else if ( random() < 1/T ) { add (t,1) to S } // --------------------------------------------------- // Reduce counting sample when size exceeds threshold // --------------------------------------------------- if ( size(S) > threshold ) { T' = α × T; // α > 1 // T' is the new sampling probability for ( each t ∈ S ) { // ---------------------------------- // Adjust first element in the count // ---------------------------------- if ( random() < T/T' ) continue; // success: select remaining // element with probab. = 1 else { t.count--; // fail: delete // --------------------------------------- // Adjust remaining elements in the count // --------------------------------------- while ( t.count > 0 ) { if ( random() < 1/T' ) break; // success; add remaining value // with probab. 1 else t.count--; // fail: delete } if ( t.count == 0 ) delete t from S } // else } // for T = T'; // New selection probability... } } |
|
|
(Ingenious :-))