main()
{
set up for parallel execution (sequential)
parallel computational section
gather result and
set up for next parallel execution (sequential)
parallel computational section
and so on...
}
|
void *worker (void *arg)
{
....
}
int main(int argc, char *argv[])
{
// SEQUENTIAL Section
.... Prepare problem (setup shared variables) ....
// PARALLEL Section
// Start workers
for (i = 0; i < NUM_PROCESSORS; i = i + 1)
{
param[i] = ....;
pthread_create(&tid[i], &attr, worker, ¶m[i])
}
// Wait for all workers to finish
for (i = 0; i < NUM_PROCESSORS; i = i + 1)
pthread_join(tid[i], NULL);
// SEQUENTIAL Section
.... Gather result (post-processing) and set up
variables for next parallel section
and so on....
}
|
#define N_Items 1000000
double x[N_Items]; // Input array
double min;
int i;
min = x[0];
for ( i = 1; i < N_Items; i = i + 1 )
if ( x[i] < min )
min = x[i];
|
|
(For simplicity of discussion, I used 2 threads)
|
start[0] start[1]
| |
| values handled by | values handled by
V thread 0 V thread 1
|<--------------------->|<--------------------->|
| |
| |
V V
min[0] min[1]
\ /
\ /
\ /
\ /
\ /
main thread
|
|
V
Actual minimum
|
start[0] = N_Items/N_Threads;
start[1] = 2 * N_Items/N_Threads;
start[2] = 3 * N_Items/N_Threads;
.... ^
|
|
Pass this "ID" to a thread so the thread
can determine its share of labor
|
Program - Main Thread:
#define N_Items 1000000
/* Shared Variables */
double x[N_Items]; // Must be SHARED (accessed by worker threads !!)
int start[100]; // Contain starting array index of each thread
double min[100]; // Contain the minimum found by each thread
int num_threads;
// -----------------------------------
// Create worker threads....
// -----------------------------------
for (i = 0; i < num_threads; i = i + 1)
{
start[i] = i; // Pass ID to thread in a private variable
if ( pthread_create(&tid[i], NULL, worker, (void *)&start[i]) )
{
cout << "Cannot create thread" << endl;
exit(1);
}
}
// -----------------------------------
// Wait for worker threads to end....
// -----------------------------------
for (i = 0; i < num_threads; i = i + 1)
pthread_join(tid[i], NULL);
// ----------------------------------------
// Post processing: Find actual minimum
// ----------------------------------------
my_min = min[0];
for (i = 1; i < num_threads; i++)
if ( min[i] < my_min )
my_min = min[i];
|
Worker Thread:
void *worker(void *arg)
{
int i, s;
int n, start, stop;
double my_min;
n = N_Items/num_threads; // number of elements to handle
/* ------------------------------------
Get thread's ID (value = 0 or 1 or 2..)
------------------------------------ */
s = * (int *) arg;
/* ---------------------------------
Locate the starting index
--------------------------------- */
start = s * n; // Starting index
/* ---------------------------------
Locate the ending index
--------------------------------- */
if ( s != (num_threads-1) )
{
stop = start + n; // Ending index
}
else
{
stop = N_Items; // Ending index
}
/* ---------------------------------
Find min in my section of array
--------------------------------- */
my_min = x[start];
for (i = start+1; i < stop; i++ ) // Find min in my range
{
if ( x[i] < my_min )
my_min = x[i];
}
min[s] = my_min; // Store min in private slot
return(NULL); /* Thread exits (dies) */
}
|
Compile with: CC -mt min-mt1.C
(Again, for simplicity of discussion, I used 2 threads)
|
Graphically:
values handled by thread 0
| | | | | | | | | | | | | |
V V V V V V V V V V V V V V
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
| | | | | | | | | | | | | |
values handled by thread 1
Thread 0 Thread 1
| |
| |
V V
min[0] min[1]
\ /
\ /
\ /
\ /
\ /
main thread
|
|
V
Actual minimum
|
elements processed by thread[0]: 0, 0+N_Threads, 0+2*N_Threads ...
elements processed by thread[1]: 1, 1+N_Threads, 1+2*N_Threads ...
elements processed by thread[2]: 2, 2+N_Threads, 2+2*N_Threads ...
.... ^
|
|
Pass this "ID" to a thread so the thread
can determine its share of labor
|
Program - Main Thread: (UNCHANGED)
// -----------------------------------
// Create worker threads....
// -----------------------------------
for (i = 0; i < num_threads; i = i + 1)
{
start[i] = i; // Pass ID to thread in a private variable
if ( pthread_create(&tid[i], NULL, worker, (void *)&start[i]) )
{
cout << "Cannot create thread" << endl;
exit(1);
}
}
// -----------------------------------
// Wait for worker threads to end....
// -----------------------------------
for (i = 0; i < num_threads; i = i + 1)
pthread_join(tid[i], NULL);
// ----------------------------------------
// Post processing: Find actual minimum
// ----------------------------------------
my_min = min[0];
for (i = 1; i < num_threads; i++)
if ( min[i] < my_min )
my_min = min[i];
|
Worker Thread: changed !!!
void *worker(void *arg)
{
int i, s;
double my_min;
s = * (int *) arg;
// --------------------------------------
// Find min in my range
// --------------------------------------
my_min = x[s];
for (i = s + num_threads; i < N_Items; i = i + num_threads)
{
if ( x[i] < my_min )
my_min = x[i];
}
min[s] = my_min; // Store min in private slot
return(NULL); /* Thread exits (dies) */
}
|
Compile with: CC -mt min-mt2.C
Rather:
|
|
|
|
Graphically:
|
|
start[0] start[1]
| |
| values handled by | values handled by
V thread 0 V thread 1
|---------------------->|---------------------->|
Page access pattern:
|----|----|----|... |----|----|----|...
^ ^
| |
|
Conclusion:
|
values handled by thread 0
| | | | | | | | | | | | | |
V V V V V V V V V V V V V V
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
| | | | | | | | | | | | | |
values handled by thread 1
Page access pattern:
|
V
|-------|-------|-------|...
^
|
|
Conclusion:
|
|