See: click here
|
However, not all access operations are conflicting
|
|
Thread 1 on Thread 2 on
Memory CPU 1 CPU 2
============== =================== =================
N = 1234
Read N --> 1234
Add 1 --> 1235 Read N --> 1234
N = 1235 Write N
Add 1 --> 1235
N = 1235 Write N
|
|
|
|
|
These are the most useful synchronization techniques for parallel numerical program that need to update shared variables
|
|
pthread_mutex_t x; |
int pthread_mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr ); |
This kind of mutex lock is created using the (default) attribute NULL
Example:
Example: a mutex with an initial
unlock state
pthread_mutex_t x; /* Define a mutex lock "x" */ pthread_mutex_init(&x, NULL); /* Initialize "x" */ |
int pthread_mutex_lock(pthread_mutex_t *mutex); |
Example:
pthread_mutex_t x; pthread_mutex_init(&x, NULL); ... pthread_mutex_lock(&x); |
int pthread_mutex_unlock(pthread_mutex_t *mutex); |
Example:
pthread_mutex_unlock(&x); |
#include <pthread.h>
int N;
pthread_t tid[100];
// Each thread executes the following function:
void *worker(void *arg)
{
int i, k, s;
for (i = 0; i < 10000; i = i + 1)
{
N = N + 1;
}
cout << "Added 10000 to N" << endl;
return(NULL); /* Thread exits (dies) */
}
/* =======================
MAIN
======================= */
int main(int argc, char *argv[])
{
int i, num_threads;
num_threads = atoi(argv[1]);
/* ------
Create threads
------ */
for (i = 0; i < num_threads; i = i + 1)
{
if ( pthread_create(&tid[i], NULL, worker, NULL) )
{
cout << "Cannot create thread" << endl;
exit(1);
}
}
N = 0;
// Wait for all threads to terminate
for (i = 0; i < num_threads; i = i + 1)
pthread_join(tid[i], NULL);
cout << "N = " << N << endl << endl;
exit(0);
}
|
Whenever a thread wants to update a shared variable, it must enclose the update operation between a "lock - unlock" pair.
Example:
int N; // SHARED variable
pthread_mutex_t N_mutex; // Mutex controlling access to N
void *worker(void *arg)
{
int i;
for (i = 0; i < 10000; i = i + 1)
{
pthread_mutex_lock(&N_mutex);
N = N + 1;
pthread_mutex_unlock(&N_mutex);
}
}
|
|
Compare the behavior of this program with the one that does not use MUTEX to control access to N: click here
|
Integrate( f(x) = 2.0 / sqrt(1 - x*x) , x = 0 to x = 1 ) |
Maple:> integrate(2.0 / sqrt(1 - x*x), x=0..1); 3.141592654 |
Example:
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int i;
int N;
double sum;
double x, w;
N = ...; // Will determine the accuracy
// of the approximation
w = 1.0/N; // Width of interval
sum = 0.0;
for (i = 1; i <= N; i = i + 1)
{
x = w*(i - 0.5); // Get the middle of the interval
sum = sum + w*f(x); // Sum the area...
}
cout << sum;
}
|
Compile with:
Run the program with:
double f(double a)
{
return( 2.0 / sqrt(1 - a*a) );
}
int main(int argc, char *argv[])
{
int i;
int N;
double sum;
double x, w;
N = ...; // Will determine the accuracy of approximation
w = 1.0/N;
sum = 0.0;
for (i = 0; i < N; i = i + 1)
{
x = w*(i + 0.5); // We can make x non-shared..
sum = sum + w*f(x); // sum is SHARED !!!
}
cout << sum;
}
|
Observation:
|
|
|
|
We will choose the interleaved labor division for the ease of programming
/*** Shared variables, but not updated.... ***/
int N; // # intervals
double w; // width of one interval
int num_threads; // # threads
/*** Shared variables, updated !!! ***/
double sum;
pthread_mutex_t sum_mutex; // Mutex to control access to sum
int main(int argc, char *argv[])
{
int Start[100]; // Start index values for each thread
pthread_t tid[100]; // Used for pthread_join()
int i;
N = ...; // Read N in from keyboard...
w = 1.0/N; // "Broadcast" w
num_threads = ... // Skip distance for each thread
sum = 0.0; // Initialized shared variable
pthread_mutex_init(&sum_mutex, NULL); // Init mutex
/**** Make worker threads... ****/
for (i = 1; i <= N; i = i + 1)
{
Start[i] = i; // Start index for thread i
if ( pthread_create(&tid[i], NULL, PIworker, &Start[i]) )
{
cout << "Cannot create thread" << endl;
exit(1);
}
}
/**** Wait for worker threads to finish... ****/
for (i = 0; i < num_threads; i = i + 1)
pthread_join(tid[i], NULL);
cout << sum;
}
|
Worker thread:
void *PIworker(void *arg)
{
int i, myStart;
double x;
/*** Get the parameter (which is my starting index) ***/
myStart = * (int *) arg;
/*** Compute sum, skipping every "num_threads" items ***/
for (i = myStart; i < N; i = i + num_threads)
{
x = w * ((double) i + 0.5); // next x
pthread_mutex_lock(&sum_mutex);
sum = sum + w*f(x); // Add to sum
pthread_mutex_unlock(&sum_mutex);
}
return(NULL); /* Thread exits (dies) */
}
|
The parallel version is slower than the sequential version !!!
|
|
Worker thread:
void *PIworker(void *arg)
{
int i, myStart;
double x;
double tmp_sum;
/*** Get the parameter (which is my starting index) ***/
myStart = * (int *) arg;
/*** Compute sum, skipping every "num_threads" items ***/
for (i = myStart; i < N; i = i + num_threads)
{
x = w * ((double) i + 0.5); // next x
tmp_sum = tmp_sum + w*f(x); // No mutex lock needed !
}
pthread_mutex_lock(&sum_mutex);
sum = sum + tmp_sum; // Synch only ONCE !!!
pthread_mutex_unlock(&sum_mutex);
return(NULL); /* Thread exits (dies) */
}
|
What a difference it can make where you put the synchronization points in a parallel program....
|
|
|
pthread_rwlock_t x; |
int pthread_rwlock_init(pthread_rwlock_t *rwlock, pthread_rwlockattr_t *attr ); |
The most common read/write lock is one where the lock is initially in the unlock.
This kind of mutex lock is created using the (default) attribute null:
Example: a read/write lock with an initial
unlock state
pthread_rwlock_init(&x, NULL); /* Default initialization */ |
int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock); |
Example:
pthread_rwlock_t x; pthread_rwlock_init(&x, NULL); ... pthread_rwlock_rdlock(&x); |
int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock); |
Example:
pthread_rwlock_t x; pthread_rwlock_init(&x, NULL); ... pthread_rwlock_wrlock(&x); |