Changeset 598 for soft/giet_vm/applications/ocean/multi.C
- Timestamp:
- Jul 9, 2015, 2:11:17 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/ocean/multi.C
r589 r598 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 2 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 16 17 /* Shared memory implementation of the multigrid method 18 Implementation uses red-black gauss-seidel relaxation 19 iterations, w cycles, and the method of half-injection for 20 residual computation. */ 21 22 EXTERN_ENV 23 24 #include <stdio.h> 25 #include <math.h> 26 #include <stdlib.h> 27 28 #include "decs.h" 29 30 /* perform multigrid (w cycles) */ 31 void multig(long my_id) 32 { 33 long iter; 34 double wu; 35 double errp; 36 long m; 37 long flag; 38 long k; 39 long my_num; 40 double wmax; 41 double local_err; 42 double red_local_err; 43 double black_local_err; 44 double g_error; 45 46 flag = 0; 47 iter = 0; 48 m = numlev - 1; 49 wmax = maxwork; 50 my_num = my_id; 51 wu = 0.0; 52 53 k = m; 54 g_error = 1.0e30; 55 while (!flag) { 56 errp = g_error; 57 iter++; 58 if (my_num == MASTER) { 59 multi->err_multi = 0.0; 60 } 61 62 /* barrier to make sure all procs have finished intadd or rescal */ 63 /* before proceeding with relaxation */ 64 #if defined(MULTIPLE_BARRIERS) 65 BARRIER(bars->error_barrier, nprocs) 66 #else 67 BARRIER(bars->barrier, nprocs) 68 #endif 69 copy_black(k, my_num); 70 71 relax(k, &red_local_err, RED_ITER, my_num); 72 73 /* barrier to make sure all red computations have been performed */ 74 #if defined(MULTIPLE_BARRIERS) 75 BARRIER(bars->error_barrier, nprocs) 76 #else 77 BARRIER(bars->barrier, nprocs) 78 #endif 79 copy_red(k, my_num); 80 81 relax(k, &black_local_err, BLACK_ITER, my_num); 82 83 /* compute max local error from red_local_err and black_local_err */ 84 85 if (red_local_err > black_local_err) { 86 local_err = red_local_err; 87 } else { 88 local_err = black_local_err; 89 } 90 91 /* update the global error if necessary */ 92 93 LOCK(locks->error_lock) 94 if (local_err > multi->err_multi) { 95 multi->err_multi = local_err; 96 } 97 UNLOCK(locks->error_lock) 98 99 /* a single relaxation sweep at the finest level is one unit of */ 100 /* work */ 101 wu += pow((double) 4.0, (double) k - m); 102 103 /* barrier to make sure all processors have checked local error */ 104 #if defined(MULTIPLE_BARRIERS) 105 BARRIER(bars->error_barrier, nprocs) 106 #else 107 BARRIER(bars->barrier, nprocs) 108 #endif 109 g_error = multi->err_multi; 110 111 /* barrier to make sure master does not cycle back to top of loop */ 112 /* and reset global->err before we read it and decide what to do */ 113 #if defined(MULTIPLE_BARRIERS) 114 BARRIER(bars->error_barrier, nprocs) 115 #else 116 BARRIER(bars->barrier, nprocs) 117 #endif 118 if (g_error >= lev_tol[k]) { 119 if (wu > wmax) { 120 /* max work exceeded */ 121 fprintf(stderr, "ERROR: Maximum work limit %0.5f exceeded\n", wmax); 122 exit(-1); 123 } else { 124 /* if we have not converged */ 125 if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) { 126 /* if need to go to coarser grid */ 127 128 copy_borders(k, my_num); 129 copy_rhs_borders(k, my_num); 130 131 /* This bar is needed because the routine rescal uses the neighbor's 132 border points to compute s4. We must ensure that the neighbor's 133 border points have been written before we try computing the new 134 rescal values */ 135 136 #if defined(MULTIPLE_BARRIERS) 137 BARRIER(bars->error_barrier, nprocs) 138 #else 139 BARRIER(bars->barrier, nprocs) 140 #endif 141 rescal(k, my_num); 142 143 /* transfer residual to rhs of coarser grid */ 144 lev_tol[k - 1] = 0.3 * g_error; 145 k = k - 1; 146 putz(k, my_num); 147 /* make initial guess on coarser grid zero */ 148 g_error = 1.0e30; 149 } 150 } 151 } else { 152 /* if we have converged at this level */ 153 if (k == m) { 154 /* if finest grid, we are done */ 155 flag = 1; 156 } else { 157 /* else go to next finest grid */ 158 159 copy_borders(k, my_num); 160 161 intadd(k, my_num); 162 /* changes the grid values at the finer level. rhs at finer level */ 163 /* remains what it already is */ 164 k++; 165 g_error = 1.0e30; 166 } 167 } 168 } 169 if (do_output) { 170 if (my_num == MASTER) { 171 printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter, k, multi->err_multi, wu); 172 } 173 } 174 } 175 176 /* perform red or black iteration (not both) */ 177 void relax(long k, double *err, long color, long my_num) 178 { 179 long i; 180 long j; 181 long iend; 182 long jend; 183 long oddistart; 184 long oddjstart; 185 long evenistart; 186 long evenjstart; 187 double a; 188 double h; 189 double factor; 190 double maxerr; 191 double newerr; 192 double oldval; 193 double newval; 194 double **t2a; 195 double **t2b; 196 double *t1a; 197 double *t1b; 198 double *t1c; 199 double *t1d; 200 201 i = 0; 202 j = 0; 203 204 *err = 0.0; 205 h = lev_res[k]; 206 207 /* points whose sum of row and col index is even do a red iteration, */ 208 /* others do a black */ 209 210 evenistart = gp[my_num].eist[k]; 211 evenjstart = gp[my_num].ejst[k]; 212 oddistart = gp[my_num].oist[k]; 213 oddjstart = gp[my_num].ojst[k]; 214 215 iend = gp[my_num].rlien[k]; 216 jend = gp[my_num].rljen[k]; 217 218 factor = 4.0 - eig2 * h * h; 219 maxerr = 0.0; 220 t2a = (double **) q_multi[my_num][k]; 221 t2b = (double **) rhs_multi[my_num][k]; 222 if (color == RED_ITER) { 223 for (i = evenistart; i < iend; i += 2) { 224 t1a = (double *) t2a[i]; 225 t1b = (double *) t2b[i]; 226 t1c = (double *) t2a[i - 1]; 227 t1d = (double *) t2a[i + 1]; 228 for (j = evenjstart; j < jend; j += 2) { 229 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; 230 oldval = t1a[j]; 231 newval = a / factor; 232 newerr = oldval - newval; 233 t1a[j] = newval; 234 if (fabs(newerr) > maxerr) { 235 maxerr = fabs(newerr); 236 } 237 } 238 } 239 for (i = oddistart; i < iend; i += 2) { 240 t1a = (double *) t2a[i]; 241 t1b = (double *) t2b[i]; 242 t1c = (double *) t2a[i - 1]; 243 t1d = (double *) t2a[i + 1]; 244 for (j = oddjstart; j < jend; j += 2) { 245 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; 246 oldval = t1a[j]; 247 newval = a / factor; 248 newerr = oldval - newval; 249 t1a[j] = newval; 250 if (fabs(newerr) > maxerr) { 251 maxerr = fabs(newerr); 252 } 253 } 254 } 255 } else if (color == BLACK_ITER) { 256 for (i = evenistart; i < iend; i += 2) { 257 t1a = (double *) t2a[i]; 258 t1b = (double *) t2b[i]; 259 t1c = (double *) t2a[i - 1]; 260 t1d = (double *) t2a[i + 1]; 261 for (j = oddjstart; j < jend; j += 2) { 262 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; 263 oldval = t1a[j]; 264 newval = a / factor; 265 newerr = oldval - newval; 266 t1a[j] = newval; 267 if (fabs(newerr) > maxerr) { 268 maxerr = fabs(newerr); 269 } 270 } 271 } 272 for (i = oddistart; i < iend; i += 2) { 273 t1a = (double *) t2a[i]; 274 t1b = (double *) t2b[i]; 275 t1c = (double *) t2a[i - 1]; 276 t1d = (double *) t2a[i + 1]; 277 for (j = evenjstart; j < jend; j += 2) { 278 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; 279 oldval = t1a[j]; 280 newval = a / factor; 281 newerr = oldval - newval; 282 t1a[j] = newval; 283 if (fabs(newerr) > maxerr) { 284 maxerr = fabs(newerr); 285 } 286 } 287 } 288 } 289 *err = maxerr; 290 } 291 292 /* perform half-injection to next coarsest level */ 293 void rescal(long kf, long my_num) 294 { 295 long ic; 296 long if17; 297 long jf; 298 long jc; 299 long krc; 300 long istart; 301 long iend; 302 long jstart; 303 long jend; 304 double hf; 305 double s; 306 double s1; 307 double s2; 308 double s3; 309 double s4; 310 double factor; 311 double int1; 312 double int2; 313 double i_int_factor; 314 double j_int_factor; 315 long i_off; 316 long j_off; 317 long up_proc; 318 long left_proc; 319 long im; 320 long jm; 321 double temp; 322 double temp2; 323 double **t2a; 324 double **t2b; 325 double **t2c; 326 double *t1a; 327 double *t1b; 328 double *t1c; 329 double *t1d; 330 double *t1e; 331 double *t1f; 332 double *t1g; 333 double *t1h; 334 335 krc = kf - 1; 336 //hc = lev_res[krc]; 337 hf = lev_res[kf]; 338 i_off = (*gp[my_num].rownum) * ypts_per_proc[krc]; 339 j_off = (*gp[my_num].colnum) * xpts_per_proc[krc]; 340 up_proc = gp[my_num].neighbors[UP]; 341 left_proc = gp[my_num].neighbors[LEFT]; 342 im = (imx[kf] - 2) / yprocs; 343 jm = (jmx[kf] - 2) / xprocs; 344 345 istart = gp[my_num].rlist[krc]; 346 jstart = gp[my_num].rljst[krc]; 347 iend = gp[my_num].rlien[krc] - 1; 348 jend = gp[my_num].rljen[krc] - 1; 349 350 factor = 4.0 - eig2 * hf * hf; 351 352 t2a = (double **) q_multi[my_num][kf]; 353 t2b = (double **) rhs_multi[my_num][kf]; 354 t2c = (double **) rhs_multi[my_num][krc]; 355 if17 = 2 * (istart - 1); 356 for (ic = istart; ic <= iend; ic++) { 357 if17 += 2; 358 i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5; 359 jf = 2 * (jstart - 1); 360 t1a = (double *) t2a[if17]; 361 t1b = (double *) t2b[if17]; 362 t1c = (double *) t2c[ic]; 363 t1d = (double *) t2a[if17 - 1]; 364 t1e = (double *) t2a[if17 + 1]; 365 t1f = (double *) t2a[if17 - 2]; 366 t1g = (double *) t2a[if17 - 3]; 367 t1h = (double *) t2b[if17 - 2]; 368 for (jc = jstart; jc <= jend; jc++) { 369 jf += 2; 370 j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5; 371 372 /* method of half-injection uses 2.0 instead of 4.0 */ 373 374 /* do bilinear interpolation */ 375 s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf]; 376 s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); 377 if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) || ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) { 378 s2 = 0; 379 s3 = 0; 380 s4 = 0; 381 } else if ((if17 == 2) || (jf == 2)) { 382 if (jf == 2) { 383 temp = q_multi[left_proc][kf][if17][jm - 1]; 384 } else { 385 temp = t1a[jf - 3]; 386 } 387 s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2]; 388 s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); 389 if (if17 == 2) { 390 temp = q_multi[up_proc][kf][im - 1][jf]; 391 } else { 392 temp = t1g[jf]; 393 } 394 s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf]; 395 s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); 396 if (jf == 2) { 397 temp = q_multi[left_proc][kf][if17 - 2][jm - 1]; 398 } else { 399 temp = t1f[jf - 3]; 400 } 401 if (if17 == 2) { 402 temp2 = q_multi[up_proc][kf][im - 1][jf - 2]; 403 } else { 404 temp2 = t1g[jf - 2]; 405 } 406 s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2]; 407 s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); 408 } else { 409 s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2]; 410 s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); 411 s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf]; 412 s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); 413 s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2]; 414 s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); 415 } 416 int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3; 417 int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1; 418 //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; 419 t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2; 420 } 421 } 422 } 423 424 /* perform interpolation and addition to next finest grid */ 425 void intadd(long kc, long my_num) 426 { 427 long ic; 428 long if17; 429 long jf; 430 long jc; 431 long kf; 432 long istart; 433 long jstart; 434 long iend; 435 long jend; 436 double int1; 437 double int2; 438 double i_int_factor1; 439 double j_int_factor1; 440 double i_int_factor2; 441 double j_int_factor2; 442 long i_off; 443 long j_off; 444 double **t2a; 445 double **t2b; 446 double *t1a; 447 double *t1b; 448 double *t1c; 449 double *t1d; 450 double *t1e; 451 452 kf = kc + 1; 453 //hc = lev_res[kc]; 454 //hf = lev_res[kf]; 455 456 istart = gp[my_num].rlist[kc]; 457 jstart = gp[my_num].rljst[kc]; 458 iend = gp[my_num].rlien[kc] - 1; 459 jend = gp[my_num].rljen[kc] - 1; 460 i_off = (*gp[my_num].rownum) * ypts_per_proc[kc]; 461 j_off = (*gp[my_num].colnum) * xpts_per_proc[kc]; 462 463 t2a = (double **) q_multi[my_num][kc]; 464 t2b = (double **) q_multi[my_num][kf]; 465 if17 = 2 * (istart - 1); 466 for (ic = istart; ic <= iend; ic++) { 467 if17 += 2; 468 i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]); 469 i_int_factor2 = (ic + i_off) * i_int_coeff[kf]; 470 jf = 2 * (jstart - 1); 471 472 t1a = (double *) t2a[ic]; 473 t1b = (double *) t2a[ic - 1]; 474 t1c = (double *) t2a[ic + 1]; 475 t1d = (double *) t2b[if17]; 476 t1e = (double *) t2b[if17 - 1]; 477 for (jc = jstart; jc <= jend; jc++) { 478 jf += 2; 479 j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]); 480 j_int_factor2 = (jc + j_off) * j_int_coeff[kf]; 481 482 int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc]; 483 int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc]; 484 t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; 485 int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc]; 486 t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; 487 int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc]; 488 int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc]; 489 t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; 490 int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc]; 491 t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; 492 } 493 } 494 } 495 496 /* initialize a grid to zero in parallel */ 497 void putz(long k, long my_num) 498 { 499 long i; 500 long j; 501 long istart; 502 long jstart; 503 long iend; 504 long jend; 505 double **t2a; 506 double *t1a; 507 508 istart = gp[my_num].rlist[k]; 509 jstart = gp[my_num].rljst[k]; 510 iend = gp[my_num].rlien[k]; 511 jend = gp[my_num].rljen[k]; 512 513 t2a = (double **) q_multi[my_num][k]; 514 for (i = istart; i <= iend; i++) { 515 t1a = (double *) t2a[i]; 516 for (j = jstart; j <= jend; j++) { 517 t1a[j] = 0.0; 518 } 519 } 520 } 521 522 void copy_borders(long k, long pid) 523 { 524 long i; 525 long j; 526 long jj; 527 long im; 528 long jm; 529 long lastrow; 530 long lastcol; 531 double **t2a; 532 double **t2b; 533 double *t1a; 534 double *t1b; 535 536 im = (imx[k] - 2) / yprocs + 2; 537 jm = (jmx[k] - 2) / xprocs + 2; 538 lastrow = (imx[k] - 2) / yprocs; 539 lastcol = (jmx[k] - 2) / xprocs; 540 541 t2a = (double **) q_multi[pid][k]; 542 jj = gp[pid].neighbors[UPLEFT]; 543 if (jj != -1) { 544 t2a[0][0] = q_multi[jj][k][im - 2][jm - 2]; 545 } 546 jj = gp[pid].neighbors[UPRIGHT]; 547 if (jj != -1) { 548 t2a[0][jm - 1] = q_multi[jj][k][im - 2][1]; 549 } 550 jj = gp[pid].neighbors[DOWNLEFT]; 551 if (jj != -1) { 552 t2a[im - 1][0] = q_multi[jj][k][1][jm - 2]; 553 } 554 jj = gp[pid].neighbors[DOWNRIGHT]; 555 if (jj != -1) { 556 t2a[im - 1][jm - 1] = q_multi[jj][k][1][1]; 557 } 558 559 if (gp[pid].neighbors[UP] == -1) { 560 jj = gp[pid].neighbors[LEFT]; 561 if (jj != -1) { 562 t2a[0][0] = q_multi[jj][k][0][jm - 2]; 563 } else { 564 jj = gp[pid].neighbors[DOWN]; 565 if (jj != -1) { 566 t2a[im - 1][0] = q_multi[jj][k][1][0]; 567 } 568 } 569 jj = gp[pid].neighbors[RIGHT]; 570 if (jj != -1) { 571 t2a[0][jm - 1] = q_multi[jj][k][0][1]; 572 } else { 573 jj = gp[pid].neighbors[DOWN]; 574 if (jj != -1) { 575 t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; 576 } 577 } 578 } else if (gp[pid].neighbors[DOWN] == -1) { 579 jj = gp[pid].neighbors[LEFT]; 580 if (jj != -1) { 581 t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2]; 582 } else { 583 jj = gp[pid].neighbors[UP]; 584 if (jj != -1) { 585 t2a[0][0] = q_multi[jj][k][im - 2][0]; 586 } 587 } 588 jj = gp[pid].neighbors[RIGHT]; 589 if (jj != -1) { 590 t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1]; 591 } else { 592 jj = gp[pid].neighbors[UP]; 593 if (jj != -1) { 594 t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; 595 } 596 } 597 } else if (gp[pid].neighbors[LEFT] == -1) { 598 jj = gp[pid].neighbors[UP]; 599 if (jj != -1) { 600 t2a[0][0] = q_multi[jj][k][im - 2][0]; 601 } 602 jj = gp[pid].neighbors[DOWN]; 603 if (jj != -1) { 604 t2a[im - 1][0] = q_multi[jj][k][1][0]; 605 } 606 } else if (gp[pid].neighbors[RIGHT] == -1) { 607 jj = gp[pid].neighbors[UP]; 608 if (jj != -1) { 609 t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; 610 } 611 jj = gp[pid].neighbors[DOWN]; 612 if (jj != -1) { 613 t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; 614 } 615 } 616 617 j = gp[pid].neighbors[UP]; 618 if (j != -1) { 619 t1a = (double *) t2a[0]; 620 t1b = (double *) q_multi[j][k][im - 2]; 621 for (i = 1; i <= lastcol; i++) { 622 t1a[i] = t1b[i]; 623 } 624 } 625 j = gp[pid].neighbors[DOWN]; 626 if (j != -1) { 627 t1a = (double *) t2a[im - 1]; 628 t1b = (double *) q_multi[j][k][1]; 629 for (i = 1; i <= lastcol; i++) { 630 t1a[i] = t1b[i]; 631 } 632 } 633 j = gp[pid].neighbors[LEFT]; 634 if (j != -1) { 635 t2b = (double **) q_multi[j][k]; 636 for (i = 1; i <= lastrow; i++) { 637 t2a[i][0] = t2b[i][jm - 2]; 638 } 639 } 640 j = gp[pid].neighbors[RIGHT]; 641 if (j != -1) { 642 t2b = (double **) q_multi[j][k]; 643 for (i = 1; i <= lastrow; i++) { 644 t2a[i][jm - 1] = t2b[i][1]; 645 } 646 } 647 648 } 649 650 void copy_rhs_borders(long k, long procid) 651 { 652 long i; 653 long j; 654 long im; 655 long jm; 656 long lastrow; 657 long lastcol; 658 double **t2a; 659 double **t2b; 660 double *t1a; 661 double *t1b; 662 663 im = (imx[k] - 2) / yprocs + 2; 664 jm = (jmx[k] - 2) / xprocs + 2; 665 lastrow = (imx[k] - 2) / yprocs; 666 lastcol = (jmx[k] - 2) / xprocs; 667 668 t2a = (double **) rhs_multi[procid][k]; 669 if (gp[procid].neighbors[UPLEFT] != -1) { 670 j = gp[procid].neighbors[UPLEFT]; 671 t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2]; 672 } 673 674 if (gp[procid].neighbors[UP] != -1) { 675 j = gp[procid].neighbors[UP]; 676 if (j != -1) { 677 t1a = (double *) t2a[0]; 678 t1b = (double *) rhs_multi[j][k][im - 2]; 679 for (i = 2; i <= lastcol; i += 2) { 680 t1a[i] = t1b[i]; 681 } 682 } 683 } 684 if (gp[procid].neighbors[LEFT] != -1) { 685 j = gp[procid].neighbors[LEFT]; 686 if (j != -1) { 687 t2b = (double **) rhs_multi[j][k]; 688 for (i = 2; i <= lastrow; i += 2) { 689 t2a[i][0] = t2b[i][jm - 2]; 690 } 691 } 692 } 693 } 694 695 void copy_red(long k, long procid) 696 { 697 long i; 698 long j; 699 long im; 700 long jm; 701 long lastrow; 702 long lastcol; 703 double **t2a; 704 double **t2b; 705 double *t1a; 706 double *t1b; 707 708 im = (imx[k] - 2) / yprocs + 2; 709 jm = (jmx[k] - 2) / xprocs + 2; 710 lastrow = (imx[k] - 2) / yprocs; 711 lastcol = (jmx[k] - 2) / xprocs; 712 713 t2a = (double **) q_multi[procid][k]; 714 j = gp[procid].neighbors[UP]; 715 if (j != -1) { 716 t1a = (double *) t2a[0]; 717 t1b = (double *) q_multi[j][k][im - 2]; 718 for (i = 2; i <= lastcol; i += 2) { 719 t1a[i] = t1b[i]; 720 } 721 } 722 j = gp[procid].neighbors[DOWN]; 723 if (j != -1) { 724 t1a = (double *) t2a[im - 1]; 725 t1b = (double *) q_multi[j][k][1]; 726 for (i = 1; i <= lastcol; i += 2) { 727 t1a[i] = t1b[i]; 728 } 729 } 730 j = gp[procid].neighbors[LEFT]; 731 if (j != -1) { 732 t2b = (double **) q_multi[j][k]; 733 for (i = 2; i <= lastrow; i += 2) { 734 t2a[i][0] = t2b[i][jm - 2]; 735 } 736 } 737 j = gp[procid].neighbors[RIGHT]; 738 if (j != -1) { 739 t2b = (double **) q_multi[j][k]; 740 for (i = 1; i <= lastrow; i += 2) { 741 t2a[i][jm - 1] = t2b[i][1]; 742 } 743 } 744 } 745 746 void copy_black(long k, long procid) 747 { 748 long i; 749 long j; 750 long im; 751 long jm; 752 long lastrow; 753 long lastcol; 754 double **t2a; 755 double **t2b; 756 double *t1a; 757 double *t1b; 758 759 im = (imx[k] - 2) / yprocs + 2; 760 jm = (jmx[k] - 2) / xprocs + 2; 761 lastrow = (imx[k] - 2) / yprocs; 762 lastcol = (jmx[k] - 2) / xprocs; 763 764 t2a = (double **) q_multi[procid][k]; 765 j = gp[procid].neighbors[UP]; 766 if (j != -1) { 767 t1a = (double *) t2a[0]; 768 t1b = (double *) q_multi[j][k][im - 2]; 769 for (i = 1; i <= lastcol; i += 2) { 770 t1a[i] = t1b[i]; 771 } 772 } 773 j = gp[procid].neighbors[DOWN]; 774 if (j != -1) { 775 t1a = (double *) t2a[im - 1]; 776 t1b = (double *) q_multi[j][k][1]; 777 for (i = 2; i <= lastcol; i += 2) { 778 t1a[i] = t1b[i]; 779 } 780 } 781 j = gp[procid].neighbors[LEFT]; 782 if (j != -1) { 783 t2b = (double **) q_multi[j][k]; 784 for (i = 1; i <= lastrow; i += 2) { 785 t2a[i][0] = t2b[i][jm - 2]; 786 } 787 } 788 j = gp[procid].neighbors[RIGHT]; 789 if (j != -1) { 790 t2b = (double **) q_multi[j][k]; 791 for (i = 2; i <= lastrow; i += 2) { 792 t2a[i][jm - 1] = t2b[i][1]; 793 } 794 } 795 }
Note: See TracChangeset
for help on using the changeset viewer.