#pragma omp critical
{
...
... Update share variables
...
}
|
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int i;
int N;
double sum;
double x, w;
N = ...; // accuracy of the approximation
w = 1.0/N;
sum = 0.0;
for (i = 1; i <= N; i = i + 1)
{
x = w*(i - 0.5);
sum = sum + w*f(x);
}
cout << sum;
}
|
Compile with:
Run the program with:
(We have seen this program before, so I will not explain it again: click here )
When we parallelize, it is important to know which UPDATES must be SYNCHRONIZED:
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int N;
double sum; // Shared variable, updated !
double x, w;
N = ...; // accuracy of the approximation
w = 1.0/N;
sum = 0.0;
#pragma omp parallel
{
int i, num_threads; // Non-shared variables !!!
double x;
num_threads = omp_get_num_threads() ;
for (i = omp_get_thread_num(); i < N; i = i + num_threads)
{
x = w*(i + 0.5);
#pragma omp critical
{
sum = sum + w*f(x);
}
}
}
cout << sum;
}
|
export OMP_NUM_THREADS=8
a.out 50000000
Change OMP_NUM_THREADS and see the difference in performance
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int N;
double sum; // Shared variable, updated !
double x, w;
N = ...; // accuracy of the approximation
w = 1.0/N;
sum = 0.0;
#pragma omp parallel
{
int i, num_threads;
double x;
double mypi; // Private variable to reduce synchronization
num_threads = omp_get_num_threads() ;
mypi = 0.0;
for (i = omp_get_thread_num(); i < N; i = i + num_threads)
{
x = w*(i + 0.5);
mypi = mypi + w*f(x);
}
#pragma omp critical
{
sum = sum + mypi;
}
}
cout << sum;
}
|
export OMP_NUM_THREADS=8
a.out 50000000
Change OMP_NUM_THREADS and see the difference in performance