main()
   {
      set up for parallel execution (sequential)
      parallel computational section
      gather result and 
      set up for next parallel execution (sequential)
      parallel computational section
      and so on...
   }
 |  
 void *worker (void *arg)
 {
     ....
 }
 int main(int argc, char *argv[])
 {
    // SEQUENTIAL Section 
    .... Prepare problem (setup shared variables) ....
    // PARALLEL Section 
    // Start workers
    for (i = 0; i < NUM_PROCESSORS; i = i + 1)
    {
       param[i] = ....;
       pthread_create(&tid[i], &attr, worker, ¶m[i])           
    }
 
    // Wait for all workers to finish
    for (i = 0; i < NUM_PROCESSORS; i = i + 1)
        pthread_join(tid[i], NULL);
    // SEQUENTIAL Section 
    .... Gather result (post-processing) and set up
         variables for next parallel section
    and so on....
 }
 |  
   #define N_Items   1000000
   double x[N_Items];   	// Input array
   double min;
   int i;
   min = x[0];
   for ( i = 1; i < N_Items; i = i + 1 )
      if ( x[i] < min )
	 min = x[i];
 |  
        
   
 
  |  
(For simplicity of discussion, I used 2 threads)
  
  |  
  start[0]                start[1]
   |                       |
   |  values handled by    |   values handled by
   V     thread 0          V       thread 1
   |<--------------------->|<--------------------->|     
              |                        |
              |                        |
              V                        V
            min[0]                  min[1]
               \                      /
                \                    /
                 \                  /
                  \                /
                   \              /
		      main thread
			   |
			   |
			   V
		      Actual minimum
 |  
   start[0] =     N_Items/N_Threads;
   start[1] = 2 * N_Items/N_Threads;
   start[2] = 3 * N_Items/N_Threads;
   ....  ^
	 |
	 |
      Pass this "ID" to a thread so the thread
      can determine its share of labor
 |  
Program - Main Thread:
   #define N_Items   1000000
   /* Shared Variables */
   double x[N_Items];   // Must be SHARED (accessed by worker threads !!) 
   int    start[100];   // Contain starting array index of each thread
   double min[100];     // Contain the minimum found by each thread
   int    num_threads;
   // -----------------------------------
   // Create worker threads....
   // -----------------------------------
   for (i = 0; i < num_threads; i = i + 1)
   {
      start[i] = i;     // Pass ID to thread in a private variable
      if ( pthread_create(&tid[i], NULL, worker, (void *)&start[i]) )
      {
         cout << "Cannot create thread" << endl;
         exit(1);
      }
   }
   // -----------------------------------
   // Wait for worker threads to end....
   // -----------------------------------
   for (i = 0; i < num_threads; i = i + 1)
      pthread_join(tid[i], NULL);
   // ----------------------------------------
   // Post processing: Find actual minimum
   // ----------------------------------------
   my_min = min[0];
   for (i = 1; i < num_threads; i++)
      if ( min[i] < my_min )
         my_min = min[i];
 |  
Worker Thread:
  void *worker(void *arg)
  {
     int i, s;
     int n, start, stop;
     double my_min;
  
     n = N_Items/num_threads;	// number of elements to handle    
  
     /* ------------------------------------
	Get thread's ID (value = 0 or 1 or 2..)
        ------------------------------------ */
     s = * (int *) arg;    
  
     /* ---------------------------------
	Locate the starting index
        --------------------------------- */
     start = s * n;		// Starting index
  
     /* ---------------------------------
	Locate the ending index
        --------------------------------- */
     if ( s != (num_threads-1) )
     {
        stop = start + n;	// Ending index
     }
     else
     {
        stop = N_Items;		// Ending index
     }
  
     /* ---------------------------------
	Find min in my section of array
        --------------------------------- */
     my_min = x[start];
  
     for (i = start+1; i < stop; i++ )    // Find min in my range
     {
        if ( x[i] < my_min )
           my_min = x[i];
     }
  
     min[s] = my_min;			  // Store min in private slot   
     return(NULL);     /* Thread exits (dies) */
  }
 |  
        
   
Compile with: CC -mt min-mt1.C
(Again, for simplicity of discussion, I used 2 threads)
  
  |  
Graphically:
                values handled by thread 0
   |   |   |   |   |   |   |   |   |   |   |   |   |   |
   V   V   V   V   V   V   V   V   V   V   V   V   V   V
   |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|      
     ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^
     |   |   |   |   |   |   |   |   |   |   |   |   |   |
                values handled by thread 1
          Thread 0                Thread 1
              |                        |
              |                        |
              V                        V
            min[0]                  min[1]
               \                      /
                \                    /
                 \                  /
                  \                /
                   \              /
		      main thread
			   |
			   |
			   V
		      Actual minimum
 |  
   elements processed by thread[0]: 0, 0+N_Threads, 0+2*N_Threads ...
   elements processed by thread[1]: 1, 1+N_Threads, 1+2*N_Threads ...
   elements processed by thread[2]: 2, 2+N_Threads, 2+2*N_Threads ...
   ....                         ^
                                |
                                |
                        Pass this "ID" to a thread so the thread
                        can determine its share of labor
 |  
Program - Main Thread: (UNCHANGED)
   // -----------------------------------
   // Create worker threads....
   // -----------------------------------
   for (i = 0; i < num_threads; i = i + 1)
   {
      start[i] = i;     // Pass ID to thread in a private variable
      if ( pthread_create(&tid[i], NULL, worker, (void *)&start[i]) )
      {
         cout << "Cannot create thread" << endl;
         exit(1);
      }
   }
   // -----------------------------------
   // Wait for worker threads to end....
   // -----------------------------------
   for (i = 0; i < num_threads; i = i + 1)
      pthread_join(tid[i], NULL);
   // ----------------------------------------
   // Post processing: Find actual minimum
   // ----------------------------------------
   my_min = min[0];
   for (i = 1; i < num_threads; i++)
      if ( min[i] < my_min )
         my_min = min[i];
 |  
Worker Thread: changed !!!
  void *worker(void *arg)
  {
     int i, s;
     double my_min;
  
     s = * (int *) arg;
  
     // --------------------------------------
     // Find min in my range
     // --------------------------------------
     my_min = x[s];
  
     for (i = s + num_threads; i < N_Items; i = i + num_threads)      
     {
        if ( x[i] < my_min )
           my_min = x[i];
     }
 
     min[s] = my_min; 		// Store min in private slot 
     return(NULL);     /* Thread exits (dies) */
  }
 |  
        
   
Compile with: CC -mt min-mt2.C
Rather:
 
  |  
 
  |  
 
  |  
 
  |  
Graphically:
 
 |  
 
  |  
  start[0]                start[1]
   |                       |
   |  values handled by    |   values handled by
   V     thread 0          V       thread 1
   |---------------------->|---------------------->|   
Page access pattern:
   |----|----|----|...     |----|----|----|...
      ^                            ^
      |                            |
    
 |  
Conclusion:
 
  |  
                values handled by thread 0
   |   |   |   |   |   |   |   |   |   |   |   |   |   |
   V   V   V   V   V   V   V   V   V   V   V   V   V   V
   |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
     ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^   ^
     |   |   |   |   |   |   |   |   |   |   |   |   |   |
                values handled by thread 1
Page access pattern:
       |
       V
   |-------|-------|-------|... 
                       ^
		       |
 |  
Conclusion:
 
  |  
 
  |