#pragma omp critical { ... ... Update share variables ... } |
double f(double a) { return( 2.0 / sqrt(1 - a*a) ); } int main(int argc, char *argv[]) { int i; int N; double sum; double x, w; N = ...; // accuracy of the approximation w = 1.0/N; sum = 0.0; for (i = 1; i <= N; i = i + 1) { x = w*(i - 0.5); sum = sum + w*f(x); } cout << sum; } |
Compile with:
Run the program with:
(We have seen this program before, so I will not explain it again: click here )
When we parallelize, it is important to know which UPDATES must be SYNCHRONIZED:
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int N;
double sum; // Shared variable, updated !
double x, w;
N = ...; // accuracy of the approximation
w = 1.0/N;
sum = 0.0;
#pragma omp parallel
{
int i, num_threads; // Non-shared variables !!!
double x;
num_threads = omp_get_num_threads() ;
for (i = omp_get_thread_num(); i < N; i = i + num_threads)
{
x = w*(i + 0.5);
#pragma omp critical
{
sum = sum + w*f(x);
}
}
}
cout << sum;
}
|
export OMP_NUM_THREADS=8
a.out 50000000
Change OMP_NUM_THREADS and see the difference in performance
double f(double a) { return( 2.0 / sqrt(1 - a*a) ); } int main(int argc, char *argv[]) { int N; double sum; // Shared variable, updated ! double x, w; N = ...; // accuracy of the approximation w = 1.0/N; sum = 0.0; #pragma omp parallel { int i, num_threads; double x; double mypi; // Private variable to reduce synchronization num_threads = omp_get_num_threads() ; mypi = 0.0; for (i = omp_get_thread_num(); i < N; i = i + num_threads) { x = w*(i + 0.5); mypi = mypi + w*f(x); } #pragma omp critical { sum = sum + mypi; } } cout << sum; } |
export OMP_NUM_THREADS=8
a.out 50000000
Change OMP_NUM_THREADS and see the difference in performance