|
DEMO: demo/OpenMP/compute-pi.c
|
This is the single-threaded Rectangle-Rule algorithm:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0;
#pragma omp parallel
{
int id = omp_get_thread_num();
int nThreads = omp_get_num_threads() ;
double x;
for (int i = 0; i < N; i = i + 1 )
{
x = w*(i + 0.5);
sum = sum + w*f(x);
}
}
printf("\nPi = %lf\n\n", sum);
}
|
We create a parallel region to execute the loop:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0;
#pragma omp parallel
{
int id = omp_get_thread_num();
int nThreads = omp_get_num_threads() ;
double x;
for (int i = 0; i < N; i = i + 1 )
{
x = w*(i + 0.5);
sum = sum + w*f(x);
}
}
printf("\nPi = %lf\n\n", sum);
}
|
Find the thread's ID and the # threads of the thread (needed to distribute the work load):
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0;
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads(); // # threads
double x;
for (int i = 0; i < N; i = i + 1 )
{
x = w*(i + 0.5);
sum = sum + w*f(x);
}
}
printf("\nPi = %lf\n\n", sum);
}
|
Assign/distribute the work load to each thread:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0;
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
sum = sum + w*f(x);
}
}
printf("\nPi = %lf\n\n", sum);
}
|
DEMO: demo/OpenMP/openMP-compute-pi1.c --- has synchronization errors
Updates to the shared variable sum must be synchronized:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
sum = sum + w*f(x); // Update to shared variable
}
}
printf("\nPi = %lf\n\n", sum);
}
|
Create and initialize a OpenMP lock variable sumLock:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
omp_lock_t sumLock; // Create lock variable
omp_init_lock(&sumLock); // Initialize lock variable
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
sum = sum + w*f(x); // Update to shared variable
}
}
printf("\nPi = %lf\n\n", sum);
}
|
Use the sumLock variable the synchronize the updates:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
omp_lock_t sumLock; // Create lock variable
omp_init_lock(&sumLock); // Initialize lock variable
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
omp_set_lock(&sumLock); // Lock the mutex
sum = sum + w*f(x); // Update to shared variable
omp_unset_lock(&sumLock); // Unlock the mutex
}
}
printf("\nPi = %lf\n\n", sum);
}
|
DEMO: demo/OpenMP/openMP-compute-pi2.c --- has a synchronization bottleneck
Use a private variable mySum to avoid the synchronization bottleneck:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
omp_lock_t sumLock; // Create lock variable
omp_init_lock(&sumLock); // Initialize lock variable
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double mySum = 0;
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
mySum = mySum + w*f(x); // Update to private variable
}
omp_set_lock(&sumLock); // Lock the mutex
sum = sum + mySum; // Update to shared variable
omp_unset_lock(&sumLock); // Unlock the mutex
}
printf("\nPi = %lf\n\n", sum);
}
|
DEMO: demo/OpenMP/openMP-compute-pi3.c
Alternate way to synchronized updates in OpenMP: the critical region
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double mySum = 0;
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
mySum = mySum + w*f(x); // Update to private variable
}
critical region
sum = sum + mySum; // Update to shared variable
critical region
}
printf("\nPi = %lf\n\n", sum);
}
|
Statements inside a critical region are executed by one (1) thread only:
int main(int argc, char *argv[])
{
int N = ....; // Assume N is initialized
double sum;
double w;
w = 1.0/(double) N;
sum = 0.0; // Shared variable among the threads
#pragma omp parallel
{
int id = omp_get_thread_num(); // Thread's ID
int nThreads = omp_get_num_threads();
double mySum = 0;
double x;
for (int i = id; i < N; i = i + nThreads)
{
x = w*(i + 0.5);
mySum = mySum + w*f(x); // Update to private variable
}
#pragma critical {
sum = sum + mySum; // Update to shared variable
}
}
printf("\nPi = %lf\n\n", sum);
}
|
DEMO: demo/OpenMP/openMP-compute-pi4.c