|
|
MPI_Bcast(void* buffer, int count, MPI_Datatype datatype, int rootID, MPI_Comm comm ) |
|
Psuedo code that describes what happens in a MPI_Bcast():
if ( myID == rootID ) { for ( every ID i in the communication set "comm" ) { MPI_Send( buffer, count, datatype, i, TAG, comm); } } else { MPI_Recv( buffer, count, datatype, rootID, TAG, comm); } |
Example 1: source is node 0
Example 2: source is node 1
int main(int argc, char **argv) { char buff[128]; int secret_num; int numprocs; int myid; int i; MPI_Status stat; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); // ------------------------------------------ // Node 0 obtains the secret number // ------------------------------------------ if ( myid == 0 ) { secret_num = atoi(argv[1]); } // ------------------------------------------ // Node 0 shares the secret with everybody // ------------------------------------------ MPI_Bcast (&secret_num, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( myid == 0 ) { for( i = 1; i < numprocs; i++) { MPI_Recv(buff, 128, MPI_CHAR, i, 0, MPI_COMM_WORLD, &stat); cout << buff << endl; } } else { sprintf(buff, "Processor %d knows the secret code: %d", myid, secret_num); MPI_Send(buff, 128, MPI_CHAR, 0, 0, MPI_COMM_WORLD); } MPI_Finalize(); } |
Demo instruction:
MPI_Scatter(void* sendbuf, // Distribute sendbuf evenly to recvbuf int sendcount, // # items sent to EACH processor MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int rootID, // Sending processor ! MPI_Comm comm) |
|
|
However: these rules are not strictly enforced.
(Don't blame MPI for causing "funny errors" if you decide to violate these rules :-))
|
int main(int argc, char **argv) { int buff[100]; int recvbuff[2]; int numprocs; int myid; int i, k; int mysum; MPI_Status stat; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); if ( myid == 0 ) { cout << "WE have " << numprocs << " processors" << endl; // ----------------------------------------------- // Node 0 prepare 2 number for each processor // [1][2] [3][4] [5][6] .... etc // ----------------------------------------------- k = 1; for ( i = 0; i < 2*numprocs; i += 2 ) { buff[i] = k++; buff[i+1] = k++; } } // ------------------------------------------ // Node 0 scatter the array to the processors: // ------------------------------------------ MPI_Scatter (buff, 2, MPI_INT, recvbuff, 2, MPI_INT, 0, MPI_COMM_WORLD); ^^^ !!! ^^^ !!! if ( myid == 0 ) { // Processor 0 mysum = recvbuff[0] + recvbuff[1]; cout << "Processor " << myid << ": sum = " << mysum << endl; for( i = 1; i < numprocs; i++) { MPI_Recv(&mysum, 1, MPI_INT, i, 0, MPI_COMM_WORLD, &stat); cout << "Processor " << i << ": sum = " << mysum << endl; } } else { // Other processors mysum = recvbuff[0] + recvbuff[1]; MPI_Send(&mysum, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); } MPI_Finalize(); } |
Demo instruction:
It does the reverse of MPI_Gather()....
Illustrated:
MPI_Gather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int rootID, MPI_Comm comm) |
NOTE:
the number of items received in
recvbuf
of the
rootID process
will be equal to:
|
|
Again: these rules are not strictly enforced.
(And again, don't blame MPI for causing "funny errors" if you decide to violate these rules....)
Example 1: the "getherer" is node 0
Example 2: the "getherer" is node 1
int main(int argc, char **argv) { int buff[100]; int recvbuff[2]; int numprocs; int myid; int i, k; int mysum; MPI_Status stat; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); if ( myid == 0 ) { cout << "WE have " << numprocs << " processors" << endl; // ----------------------------------------------- // Node 0 prepare 2 number for each processor // [1][2] [3][4] [5][6] .... etc // ----------------------------------------------- k = 1; for ( i = 0; i < 2*numprocs; i += 2 ) { buff[i] = k++; buff[i+1] = k++; } } // ------------------------------------------ // Node 0 scatter the array to the processors: // ------------------------------------------ MPI_Scatter (buff, 2, MPI_INT, recvbuff, 2, MPI_INT, 0, MPI_COMM_WORLD); mysum = recvbuff[0] + recvbuff[1]; // Everyone calculate sum // ------------------------------------------ // Node 0 collects the results in "buff": // ------------------------------------------ MPI_Gather (&mysum, 1, MPI_INT, &buff, 1, MPI_INT, 0, MPI_COMM_WORLD); // ------------------------------------------ // Node 0 prints result // ------------------------------------------ if ( myid == 0 ) { for( i = 0; i < numprocs; i++) { cout << "Processor " << i << ": sum = " << buff[i] << endl; } } MPI_Finalize(); } |
Demo instruction:
MPI_Reduce(void* sendbuf, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Op op, int rootID, MPI_Comm comm) |
NOTE: each item received will be immediately incorporated into the variable recvbuf !!! |
|
Again: these rules are not strictly enforced.
(And again, don't blame MPI for causing "funny errors" if you decide to violate these rules....)
The effect is the same as:
|
MPI Reduction Operation | Effect of the Reduction Operation |
---|---|
MPI_MAX | Finds the maximum value |
MPI_MIN | Finds the minimum value |
MPI_SUM | Computes the sum of all value |
MPI_PROD | Computes the product of all value |
MPI_LAND | Computes the "logical AND" of all value (0 = false, non-zero = true) |
MPI_BAND | Computes the "Bitwise AND" of all value |
MPI_LOR | Computes the "logical OR" of all value (0 = false, non-zero = true) |
MPI_BOR | Computes the "Bitwise OR" of all value |
MPI_LXOR | Computes the "logical XOR" of all value (0 = false, non-zero = true) |
MPI_BXOR | Computes the "Bitwise XOR" of all value |
MPI_MAXLOC | Find the maximum value and the processor ID that has the value
(you need to pass a structure with these 2 elements: (double value, int rank)) |
MPI_MINLOC | Find the minimum value and the processor ID that has the value
(you need to pass a structure with these 2 elements: (double value, int rank)) |
double f(double a) { return( 2.0 / sqrt(1 - a*a) ); } /* ======================= MAIN ======================= */ int main(int argc, char *argv[]) { int N; double w, x; int i, myid; double mypi, final_pi; MPI_Init(&argc,&argv); // Initialize MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Get # processors MPI_Comm_rank(MPI_COMM_WORLD, &myid); // Get my rank (id) if ( myid == 0 ) N = atoi(argv[1]); MPI_Bcast (&N, 1, MPI_INT, 0, MPI_COMM_WORLD); w = 1.0/(double) N; /* ******************************************************************* */ mypi = 0.0; for (i = myid; i < N; i = i + num_procs) { x = w*(i + 0.5); mypi = mypi + w*f(x); } /* ******************************************************************* */ MPI_Reduce ( &mypi, &final_pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if ( myid == 0 ) { cout << "Pi = " << final_pi << endl << endl; } MPI_Finalize(); } |
Demo instruction:
Advanced material below: skipped !!! (Read on by yourself if you're interested)
MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op) |
void function_name( void *in, void *inout, int *len, MPI_Datatype *datatype); |
void myAdd( void *a, void *b, int *len, MPI_Datatype *datatype) { int i; if ( *datatype == MPI_INT ) { int *x = (int *)a; // Turn the (void *) into an (int *) int *y = (int *)b; // Turn the (void *) into an (int *) for (i = 0; i < *len; i++) { *y = *x + *y; x++; y++; } } else if ( *datatype == MPI_DOUBLE ) { double *x = (double *)a; // Turn the (void *) into an (double *) double *y = (double *)b; // Turn the (void *) into an (double *) for (i = 0; i < *len; i++) { *y = *x + *y; x++; y++; } } } double f(double a) { return( 2.0 / sqrt(1 - a*a) ); } /* ======================= MAIN ======================= */ int main(int argc, char *argv[]) { int N; double w, x; int i, myid; double mypi, final_pi; MPI_Op myOp; MPI_Init(&argc,&argv); // Initialize MPI_Comm_size(MPI_COMM_WORLD, &num_procs); // Get # processors MPI_Comm_rank(MPI_COMM_WORLD, &myid); // Get my rank (id) if ( myid == 0 ) N = atoi(argv[1]); MPI_Bcast (&N, 1, MPI_INT, 0, MPI_COMM_WORLD); w = 1.0/(double) N; /* ******************************************************************* */ mypi = 0.0; for (i = myid; i < N; i = i + num_procs) { x = w*(i + 0.5); mypi = mypi + w*f(x); } /* ******************************************************************* */ MPI_Op_create( myAdd, 1, &myOp); MPI_Reduce ( &mypi, &final_pi, 1, MPI_DOUBLE, myOp, 0, MPI_COMM_WORLD); if ( myid == 0 ) { cout << "Pi = " << final_pi << endl << endl; } MPI_Finalize(); } |
Demo instruction:
MPI_Barrier( MPI_Comm comm ) |
Effect:
|