|
Assignment
- Possible static assignment: block row decomposition
- Process 0 gets rows 0 to (n/p)-1, process 1 gets rows n/p to (2n/p)-1 etc.
- Another static assignment: cyclic row decomposition
- Process 0 gets rows 0, p, 2p,…; process 1 gets rows 1, p+1, 2p+1,….
- Dynamic assignment
- Grab next available row, work on that, grab a new row,…
- Static block row assignment minimizes nearest neighbor communication by assigning contiguous rows to the same process
Shared memory version
/* include files */
MAIN_ENV;
int P, n;
void Solve ();
struct gm_t {
LOCKDEC (diff_lock);
BARDEC (barrier);
float **A, diff;
} *gm;
int main (char **argv, int argc)
{
int i;
MAIN_INITENV;
gm = (struct gm_t*) G_MALLOC (sizeof (struct gm_t));
LOCKINIT (gm->diff_lock);
BARINIT (gm->barrier);
n = atoi (argv[1]);
P = atoi (argv[2]);
gm->A = (float**) G_MALLOC ((n+2)*sizeof (float*));
for (i = 0; i < n+2; i++) {
gm->A[i] = (float*) G_MALLOC ((n+2)*sizeof (float));
}
Initialize (gm->A);
for (i = 1; i < P; i++) { /* starts at 1 */
CREATE (Solve);
}
Solve ();
WAIT_FOR_END (P-1);
MAIN_END;
} |
void Solve (void)
{
int i, j, pid, done = 0;
float temp, local_diff;
GET_PID (pid);
while (!done) {
local_diff = 0.0;
if (!pid) gm->diff = 0.0;
BARRIER (gm->barrier, P);/*why?*/
for (i = pid*(n/P); i < (pid+1)*(n/P); i++) {
for (j = 0; j < n; j++) {
temp = gm->A[i] [j];
gm->A[i] [j] = 0.2*(gm->A[i] [j] + gm->A[i] [j-1] + gm->A[i] [j+1] + gm->A[i+1] [j] + gm->A[i-1] [j]);
local_diff += fabs (gm->A[i] [j] – temp);
} /* end for */
} /* end for */
LOCK (gm->diff_lock);
gm->diff += local_diff;
UNLOCK (gm->diff_lock);
BARRIER (gm->barrier, P);
if (gm->diff/(n*n) < TOL) done = 1;
BARRIER (gm->barrier, P); /* why? */
} /* end while */
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|