|
DEMO: demo/OpenMP/compute-pi.c
|
This is the single-threaded Rectangle-Rule algorithm:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; #pragma omp parallel { int id = omp_get_thread_num(); int nThreads = omp_get_num_threads() ; double x; for (int i = 0; i < N; i = i + 1 ) { x = w*(i + 0.5); sum = sum + w*f(x); } } printf("\nPi = %lf\n\n", sum); } |
We create a parallel region to execute the loop:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; #pragma omp parallel { int id = omp_get_thread_num(); int nThreads = omp_get_num_threads() ; double x; for (int i = 0; i < N; i = i + 1 ) { x = w*(i + 0.5); sum = sum + w*f(x); } } printf("\nPi = %lf\n\n", sum); } |
Find the thread's ID and the # threads of the thread (needed to distribute the work load):
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); // # threads double x; for (int i = 0; i < N; i = i + 1 ) { x = w*(i + 0.5); sum = sum + w*f(x); } } printf("\nPi = %lf\n\n", sum); } |
Assign/distribute the work load to each thread:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); sum = sum + w*f(x); } } printf("\nPi = %lf\n\n", sum); } |
DEMO: demo/OpenMP/openMP-compute-pi1.c --- has synchronization errors
Updates to the shared variable sum must be synchronized:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); sum = sum + w*f(x); // Update to shared variable } } printf("\nPi = %lf\n\n", sum); } |
Create and initialize a OpenMP lock variable sumLock:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads omp_lock_t sumLock; // Create lock variable omp_init_lock(&sumLock); // Initialize lock variable #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); sum = sum + w*f(x); // Update to shared variable } } printf("\nPi = %lf\n\n", sum); } |
Use the sumLock variable the synchronize the updates:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads omp_lock_t sumLock; // Create lock variable omp_init_lock(&sumLock); // Initialize lock variable #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); omp_set_lock(&sumLock); // Lock the mutex sum = sum + w*f(x); // Update to shared variable omp_unset_lock(&sumLock); // Unlock the mutex } } printf("\nPi = %lf\n\n", sum); } |
DEMO: demo/OpenMP/openMP-compute-pi2.c --- has a synchronization bottleneck
Use a private variable mySum to avoid the synchronization bottleneck:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads omp_lock_t sumLock; // Create lock variable omp_init_lock(&sumLock); // Initialize lock variable #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double mySum = 0; double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); mySum = mySum + w*f(x); // Update to private variable } omp_set_lock(&sumLock); // Lock the mutex sum = sum + mySum; // Update to shared variable omp_unset_lock(&sumLock); // Unlock the mutex } printf("\nPi = %lf\n\n", sum); } |
DEMO: demo/OpenMP/openMP-compute-pi3.c
Alternate way to synchronized updates in OpenMP: the critical region
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double mySum = 0; double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); mySum = mySum + w*f(x); // Update to private variable } critical region sum = sum + mySum; // Update to shared variable critical region } printf("\nPi = %lf\n\n", sum); } |
Statements inside a critical region are executed by one (1) thread only:
int main(int argc, char *argv[]) { int N = ....; // Assume N is initialized double sum; double w; w = 1.0/(double) N; sum = 0.0; // Shared variable among the threads #pragma omp parallel { int id = omp_get_thread_num(); // Thread's ID int nThreads = omp_get_num_threads(); double mySum = 0; double x; for (int i = id; i < N; i = i + nThreads) { x = w*(i + 0.5); mySum = mySum + w*f(x); // Update to private variable } #pragma critical { sum = sum + mySum; // Update to shared variable } } printf("\nPi = %lf\n\n", sum); } |
DEMO: demo/OpenMP/openMP-compute-pi4.c