Changeset 598 for soft/giet_vm/applications/ocean/slave2.C
- Timestamp:
- Jul 9, 2015, 2:11:17 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/ocean/slave2.C
r589 r598 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 2 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 16 17 /* **************** 18 subroutine slave2 19 **************** */ 20 21 EXTERN_ENV 22 23 #include <stdio.h> 24 #include <math.h> 25 #include <stdlib.h> 26 27 #include "decs.h" 28 29 void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols) 30 { 31 long i; 32 long j; 33 long iindex; 34 double hh1; 35 double hh3; 36 double hinv; 37 double h1inv; 38 long istart; 39 long iend; 40 long jstart; 41 long jend; 42 long ist; 43 long ien; 44 long jst; 45 long jen; 46 double ressqr; 47 double psiaipriv; 48 double f4; 49 double timst; 50 long psiindex; 51 long i_off; 52 long j_off; 53 long multi_start; 54 long multi_end; 55 double **t2a; 56 double **t2b; 57 double **t2c; 58 double **t2d; 59 double **t2e; 60 double **t2f; 61 double **t2g; 62 double **t2h; 63 double *t1a; 64 double *t1b; 65 double *t1c; 66 double *t1d; 67 double *t1e; 68 double *t1f; 69 double *t1g; 70 double *t1h; 71 72 ressqr = lev_res[numlev - 1] * lev_res[numlev - 1]; 73 i_off = (*gp[procid].rownum) * numrows; 74 j_off = (*gp[procid].colnum) * numcols; 75 76 START_PHASE(procid, 1); 77 78 /* *************************************************************** 79 80 f i r s t p h a s e (of timestep calculation) 81 82 ***************************************************************/ 83 84 t2a = (double **) ga[procid]; 85 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 86 t2a[0][0] = 0.0; 87 } 88 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 89 t2a[im - 1][0] = 0.0; 90 } 91 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 92 t2a[0][jm - 1] = 0.0; 93 } 94 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 95 t2a[im - 1][jm - 1] = 0.0; 96 } 97 if (gp[procid].neighbors[UP] == -1) { 98 t1a = (double *) t2a[0]; 99 for (j = firstcol; j <= lastcol; j++) { 100 t1a[j] = 0.0; 101 } 102 } 103 if (gp[procid].neighbors[DOWN] == -1) { 104 t1a = (double *) t2a[im - 1]; 105 for (j = firstcol; j <= lastcol; j++) { 106 t1a[j] = 0.0; 107 } 108 } 109 if (gp[procid].neighbors[LEFT] == -1) { 110 for (j = firstrow; j <= lastrow; j++) { 111 t2a[j][0] = 0.0; 112 } 113 } 114 if (gp[procid].neighbors[RIGHT] == -1) { 115 for (j = firstrow; j <= lastrow; j++) { 116 t2a[j][jm - 1] = 0.0; 117 } 118 } 119 for (i = firstrow; i <= lastrow; i++) { 120 t1a = (double *) t2a[i]; 121 for (iindex = firstcol; iindex <= lastcol; iindex++) { 122 t1a[iindex] = 0.0; 123 } 124 } 125 126 t2a = (double **) gb[procid]; 127 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 128 t2a[0][0] = 0.0; 129 } 130 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 131 t2a[im - 1][0] = 0.0; 132 } 133 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 134 t2a[0][jm - 1] = 0.0; 135 } 136 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 137 t2a[im - 1][jm - 1] = 0.0; 138 } 139 if (gp[procid].neighbors[UP] == -1) { 140 t1a = (double *) t2a[0]; 141 for (j = firstcol; j <= lastcol; j++) { 142 t1a[j] = 0.0; 143 } 144 } 145 if (gp[procid].neighbors[DOWN] == -1) { 146 t1a = (double *) t2a[im - 1]; 147 for (j = firstcol; j <= lastcol; j++) { 148 t1a[j] = 0.0; 149 } 150 } 151 if (gp[procid].neighbors[LEFT] == -1) { 152 for (j = firstrow; j <= lastrow; j++) { 153 t2a[j][0] = 0.0; 154 } 155 } 156 if (gp[procid].neighbors[RIGHT] == -1) { 157 for (j = firstrow; j <= lastrow; j++) { 158 t2a[j][jm - 1] = 0.0; 159 } 160 } 161 for (i = firstrow; i <= lastrow; i++) { 162 t1a = (double *) t2a[i]; 163 for (iindex = firstcol; iindex <= lastcol; iindex++) { 164 t1a[iindex] = 0.0; 165 } 166 } 167 168 /* put the laplacian of psi{1,3} in work1{1,2} 169 note that psi(i,j,2) represents the psi3 array in 170 the original equations */ 171 172 for (psiindex = 0; psiindex <= 1; psiindex++) { 173 t2a = (double **) work1[procid][psiindex]; 174 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 175 t2a[0][0] = 0; 176 } 177 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 178 t2a[im - 1][0] = 0; 179 } 180 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 181 t2a[0][jm - 1] = 0; 182 } 183 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 184 t2a[im - 1][jm - 1] = 0; 185 } 186 laplacalc(procid, psi, work1, psiindex, firstrow, lastrow, firstcol, lastcol); 187 } 188 189 /* set values of work2 array to psi1 - psi3 */ 190 191 t2a = (double **) work2[procid]; 192 t2b = (double **) psi[procid][0]; 193 t2c = (double **) psi[procid][1]; 194 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 195 t2a[0][0] = t2b[0][0] - t2c[0][0]; 196 } 197 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 198 t2a[im - 1][0] = t2b[im - 1][0] - t2c[im - 1][0]; 199 } 200 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 201 t2a[0][jm - 1] = t2b[0][jm - 1] - t2c[0][jm - 1]; 202 } 203 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 204 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1] - t2c[im - 1][jm - 1]; 205 } 206 if (gp[procid].neighbors[UP] == -1) { 207 t1a = (double *) t2a[0]; 208 t1b = (double *) t2b[0]; 209 t1c = (double *) t2c[0]; 210 for (j = firstcol; j <= lastcol; j++) { 211 t1a[j] = t1b[j] - t1c[j]; 212 } 213 } 214 if (gp[procid].neighbors[DOWN] == -1) { 215 t1a = (double *) t2a[im - 1]; 216 t1b = (double *) t2b[im - 1]; 217 t1c = (double *) t2c[im - 1]; 218 for (j = firstcol; j <= lastcol; j++) { 219 t1a[j] = t1b[j] - t1c[j]; 220 } 221 } 222 if (gp[procid].neighbors[LEFT] == -1) { 223 for (j = firstrow; j <= lastrow; j++) { 224 t2a[j][0] = t2b[j][0] - t2c[j][0]; 225 } 226 } 227 if (gp[procid].neighbors[RIGHT] == -1) { 228 for (j = firstrow; j <= lastrow; j++) { 229 t2a[j][jm - 1] = t2b[j][jm - 1] - t2c[j][jm - 1]; 230 } 231 } 232 for (i = firstrow; i <= lastrow; i++) { 233 t1a = (double *) t2a[i]; 234 t1b = (double *) t2b[i]; 235 t1c = (double *) t2c[i]; 236 for (iindex = firstcol; iindex <= lastcol; iindex++) { 237 t1a[iindex] = t1b[iindex] - t1c[iindex]; 238 } 239 } 240 241 /* set values of work3 array to h3/h * psi1 + h1/h * psi3 */ 242 243 t2a = (double **) work3[procid]; 244 hh3 = h3 / h; 245 hh1 = h1 / h; 246 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 247 t2a[0][0] = hh3 * t2a[0][0] + hh1 * t2c[0][0]; 248 } 249 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 250 t2a[im - 1][0] = hh3 * t2a[im - 1][0] + hh1 * t2c[im - 1][0]; 251 } 252 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 253 t2a[0][jm - 1] = hh3 * t2a[0][jm - 1] + hh1 * t2c[0][jm - 1]; 254 } 255 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 256 t2a[im - 1][jm - 1] = hh3 * t2a[im - 1][jm - 1] + hh1 * t2c[im - 1][jm - 1]; 257 } 258 if (gp[procid].neighbors[UP] == -1) { 259 for (j = firstcol; j <= lastcol; j++) { 260 t2a[0][j] = hh3 * t2a[0][j] + hh1 * t2c[0][j]; 261 } 262 } 263 if (gp[procid].neighbors[DOWN] == -1) { 264 for (j = firstcol; j <= lastcol; j++) { 265 t2a[im - 1][j] = hh3 * t2a[im - 1][j] + hh1 * t2c[im - 1][j]; 266 } 267 } 268 if (gp[procid].neighbors[LEFT] == -1) { 269 for (j = firstrow; j <= lastrow; j++) { 270 t2a[j][0] = hh3 * t2a[j][0] + hh1 * t2c[j][0]; 271 } 272 } 273 if (gp[procid].neighbors[RIGHT] == -1) { 274 for (j = firstrow; j <= lastrow; j++) { 275 t2a[j][jm - 1] = hh3 * t2a[j][jm - 1] + hh1 * t2c[j][jm - 1]; 276 } 277 } 278 for (i = firstrow; i <= lastrow; i++) { 279 t1a = (double *) t2a[i]; 280 t1c = (double *) t2c[i]; 281 for (iindex = firstcol; iindex <= lastcol; iindex++) { 282 t1a[iindex] = hh3 * t1a[iindex] + hh1 * t1c[iindex]; 283 } 284 } 285 286 /* set values of temparray{1,3} to psi{1,3} */ 287 288 for (psiindex = 0; psiindex <= 1; psiindex++) { 289 t2a = (double **) temparray[procid][psiindex]; 290 t2b = (double **) psi[procid][psiindex]; 291 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 292 t2a[0][0] = t2b[0][0]; 293 } 294 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 295 t2a[im - 1][0] = t2b[im - 1][0]; 296 } 297 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 298 t2a[0][jm - 1] = t2b[0][jm - 1]; 299 } 300 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 301 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1]; 302 } 303 if (gp[procid].neighbors[UP] == -1) { 304 for (j = firstcol; j <= lastcol; j++) { 305 t2a[0][j] = t2b[0][j]; 306 } 307 } 308 if (gp[procid].neighbors[DOWN] == -1) { 309 for (j = firstcol; j <= lastcol; j++) { 310 t2a[im - 1][j] = t2b[im - 1][j]; 311 } 312 } 313 if (gp[procid].neighbors[LEFT] == -1) { 314 for (j = firstrow; j <= lastrow; j++) { 315 t2a[j][0] = t2b[j][0]; 316 } 317 } 318 if (gp[procid].neighbors[RIGHT] == -1) { 319 for (j = firstrow; j <= lastrow; j++) { 320 t2a[j][jm - 1] = t2b[j][jm - 1]; 321 } 322 } 323 324 for (i = firstrow; i <= lastrow; i++) { 325 t1a = (double *) t2a[i]; 326 t1b = (double *) t2b[i]; 327 for (iindex = firstcol; iindex <= lastcol; iindex++) { 328 t1a[iindex] = t1b[iindex]; 329 } 330 } 331 } 332 333 END_PHASE(procid, 1); 334 335 #if defined(MULTIPLE_BARRIERS) 336 BARRIER(bars->sl_phase_1, nprocs) 337 #else 338 BARRIER(bars->barrier, nprocs) 339 #endif 340 341 /* ******************************************************* 342 343 s e c o n d p h a s e 344 345 ******************************************************* 346 347 set values of psi{1,3} to psim{1,3} */ 348 349 START_PHASE(procid, 2); 350 351 for (psiindex = 0; psiindex <= 1; psiindex++) { 352 t2a = (double **) psi[procid][psiindex]; 353 t2b = (double **) psim[procid][psiindex]; 354 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 355 t2a[0][0] = t2b[0][0]; 356 } 357 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 358 t2a[0][jm - 1] = t2b[0][jm - 1]; 359 } 360 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 361 t2a[im - 1][0] = t2b[im - 1][0]; 362 } 363 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 364 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1]; 365 } 366 if (gp[procid].neighbors[UP] == -1) { 367 for (j = firstcol; j <= lastcol; j++) { 368 t2a[0][j] = t2b[0][j]; 369 } 370 } 371 if (gp[procid].neighbors[DOWN] == -1) { 372 for (j = firstcol; j <= lastcol; j++) { 373 t2a[im - 1][j] = t2b[im - 1][j]; 374 } 375 } 376 if (gp[procid].neighbors[LEFT] == -1) { 377 for (j = firstrow; j <= lastrow; j++) { 378 t2a[j][0] = t2b[j][0]; 379 } 380 } 381 if (gp[procid].neighbors[RIGHT] == -1) { 382 for (j = firstrow; j <= lastrow; j++) { 383 t2a[j][jm - 1] = t2b[j][jm - 1]; 384 } 385 } 386 387 for (i = firstrow; i <= lastrow; i++) { 388 t1a = (double *) t2a[i]; 389 t1b = (double *) t2b[i]; 390 for (iindex = firstcol; iindex <= lastcol; iindex++) { 391 t1a[iindex] = t1b[iindex]; 392 } 393 } 394 } 395 396 /* put the laplacian of the psim array 397 into the work7 array; first part of a three-laplacian 398 calculation to compute the friction terms */ 399 400 for (psiindex = 0; psiindex <= 1; psiindex++) { 401 t2a = (double **) work7[procid][psiindex]; 402 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 403 t2a[0][0] = 0; 404 } 405 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 406 t2a[im - 1][0] = 0; 407 } 408 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 409 t2a[0][jm - 1] = 0; 410 } 411 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 412 t2a[im - 1][jm - 1] = 0; 413 } 414 laplacalc(procid, psim, work7, psiindex, firstrow, lastrow, firstcol, lastcol); 415 } 416 417 /* to the values of the work1{1,2} arrays obtained from the 418 laplacians of psi{1,2} in the previous phase, add to the 419 elements of every column the corresponding value in the 420 one-dimenional f array */ 421 422 for (psiindex = 0; psiindex <= 1; psiindex++) { 423 t2a = (double **) work1[procid][psiindex]; 424 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 425 t2a[0][0] = t2a[0][0] + f[0]; 426 } 427 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 428 t2a[im - 1][0] = t2a[im - 1][0] + f[0]; 429 } 430 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 431 t2a[0][jm - 1] = t2a[0][jm - 1] + f[jmx[numlev - 1] - 1]; 432 } 433 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 434 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f[jmx[numlev - 1] - 1]; 435 } 436 if (gp[procid].neighbors[UP] == -1) { 437 for (j = firstcol; j <= lastcol; j++) { 438 t2a[0][j] = t2a[0][j] + f[j + j_off]; 439 } 440 } 441 if (gp[procid].neighbors[DOWN] == -1) { 442 for (j = firstcol; j <= lastcol; j++) { 443 t2a[im - 1][j] = t2a[im - 1][j] + f[j + j_off]; 444 } 445 } 446 if (gp[procid].neighbors[LEFT] == -1) { 447 for (j = firstrow; j <= lastrow; j++) { 448 t2a[j][0] = t2a[j][0] + f[j + i_off]; 449 } 450 } 451 if (gp[procid].neighbors[RIGHT] == -1) { 452 for (j = firstrow; j <= lastrow; j++) { 453 t2a[j][jm - 1] = t2a[j][jm - 1] + f[j + i_off]; 454 } 455 } 456 for (i = firstrow; i <= lastrow; i++) { 457 t1a = (double *) t2a[i]; 458 for (iindex = firstcol; iindex <= lastcol; iindex++) { 459 t1a[iindex] = t1a[iindex] + f[iindex + j_off]; 460 } 461 } 462 } 463 464 END_PHASE(procid, 2); 465 466 #if defined(MULTIPLE_BARRIERS) 467 BARRIER(bars->sl_phase_2, nprocs) 468 #else 469 BARRIER(bars->barrier, nprocs) 470 #endif 471 /* ******************************************************* 472 473 t h i r d p h a s e 474 475 ******************************************************* 476 477 put the jacobian of the work1{1,2} and psi{1,3} arrays 478 (the latter currently in temparray) in the work5{1,2} arrays */ 479 480 START_PHASE(procid, 3); 481 482 for (psiindex = 0; psiindex <= 1; psiindex++) { 483 jacobcalc2(work1, temparray, work5, psiindex, procid, firstrow, lastrow, firstcol, lastcol); 484 } 485 486 /* set values of psim{1,3} to temparray{1,3} */ 487 488 for (psiindex = 0; psiindex <= 1; psiindex++) { 489 t2a = (double **) psim[procid][psiindex]; 490 t2b = (double **) temparray[procid][psiindex]; 491 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 492 t2a[0][0] = t2b[0][0]; 493 } 494 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 495 t2a[im - 1][0] = t2b[im - 1][0]; 496 } 497 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 498 t2a[0][jm - 1] = t2b[0][jm - 1]; 499 } 500 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 501 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1]; 502 } 503 if (gp[procid].neighbors[UP] == -1) { 504 t1a = (double *) t2a[0]; 505 t1b = (double *) t2b[0]; 506 for (j = firstcol; j <= lastcol; j++) { 507 t1a[j] = t1b[j]; 508 } 509 } 510 if (gp[procid].neighbors[DOWN] == -1) { 511 t1a = (double *) t2a[im - 1]; 512 t1b = (double *) t2b[im - 1]; 513 for (j = firstcol; j <= lastcol; j++) { 514 t1a[j] = t1b[j]; 515 } 516 } 517 if (gp[procid].neighbors[LEFT] == -1) { 518 for (j = firstrow; j <= lastrow; j++) { 519 t2a[j][0] = t2b[j][0]; 520 } 521 } 522 if (gp[procid].neighbors[RIGHT] == -1) { 523 for (j = firstrow; j <= lastrow; j++) { 524 t2a[j][jm - 1] = t2b[j][jm - 1]; 525 } 526 } 527 for (i = firstrow; i <= lastrow; i++) { 528 t1a = (double *) t2a[i]; 529 t1b = (double *) t2b[i]; 530 for (iindex = firstcol; iindex <= lastcol; iindex++) { 531 t1a[iindex] = t1b[iindex]; 532 } 533 } 534 } 535 536 /* put the laplacian of the work7{1,2} arrays in the work4{1,2} 537 arrays; second step in the three-laplacian friction calculation */ 538 539 for (psiindex = 0; psiindex <= 1; psiindex++) { 540 laplacalc(procid, work7, work4, psiindex, firstrow, lastrow, firstcol, lastcol); 541 } 542 543 END_PHASE(procid, 3); 544 545 #if defined(MULTIPLE_BARRIERS) 546 BARRIER(bars->sl_phase_3, nprocs) 547 #else 548 BARRIER(bars->barrier, nprocs) 549 #endif 550 551 /* ******************************************************* 552 553 f o u r t h p h a s e 554 555 ******************************************************* 556 557 put the jacobian of the work2 and work3 arrays in the work6 558 array */ 559 560 START_PHASE(procid, 4); 561 562 jacobcalc(work2, work3, work6, procid, firstrow, lastrow, firstcol, lastcol); 563 564 /* put the laplacian of the work4{1,2} arrays in the work7{1,2} 565 arrays; third step in the three-laplacian friction calculation */ 566 567 for (psiindex = 0; psiindex <= 1; psiindex++) { 568 laplacalc(procid, work4, work7, psiindex, firstrow, lastrow, firstcol, lastcol); 569 } 570 571 END_PHASE(procid, 4); 572 573 #if defined(MULTIPLE_BARRIERS) 574 BARRIER(bars->sl_phase_4, nprocs) 575 #else 576 BARRIER(bars->barrier, nprocs) 577 #endif 578 579 /* ******************************************************* 580 581 f i f t h p h a s e 582 583 ******************************************************* 584 585 use the values of the work5, work6 and work7 arrays 586 computed in the previous time-steps to compute the 587 ga and gb arrays */ 588 589 START_PHASE(procid, 5); 590 591 hinv = 1.0 / h; 592 h1inv = 1.0 / h1; 593 594 t2a = (double **) ga[procid]; 595 t2b = (double **) gb[procid]; 596 t2c = (double **) work5[procid][0]; 597 t2d = (double **) work5[procid][1]; 598 t2e = (double **) work7[procid][0]; 599 t2f = (double **) work7[procid][1]; 600 t2g = (double **) work6[procid]; 601 t2h = (double **) tauz[procid]; 602 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 603 t2a[0][0] = t2c[0][0] - t2d[0][0] + eig2 * t2g[0][0] + h1inv * t2h[0][0] + lf * t2e[0][0] - lf * t2f[0][0]; 604 t2b[0][0] = hh1 * t2c[0][0] + hh3 * t2d[0][0] + hinv * t2h[0][0] + lf * hh1 * t2e[0][0] + lf * hh3 * t2f[0][0]; 605 } 606 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 607 t2a[im - 1][0] = t2c[im - 1][0] - t2d[im - 1][0] + eig2 * t2g[im - 1][0] + h1inv * t2h[im - 1][0] + lf * t2e[im - 1][0] - lf * t2f[im - 1][0]; 608 t2b[im - 1][0] = hh1 * t2c[im - 1][0] + hh3 * t2d[im - 1][0] + hinv * t2h[im - 1][0] + lf * hh1 * t2e[im - 1][0] + lf * hh3 * t2f[im - 1][0]; 609 } 610 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 611 t2a[0][jm - 1] = t2c[0][jm - 1] - t2d[0][jm - 1] + eig2 * t2g[0][jm - 1] + h1inv * t2h[0][jm - 1] + lf * t2e[0][jm - 1] - lf * t2f[0][jm - 1]; 612 t2b[0][jm - 1] = hh1 * t2c[0][jm - 1] + hh3 * t2d[0][jm - 1] + hinv * t2h[0][jm - 1] + lf * hh1 * t2e[0][jm - 1] + lf * hh3 * t2f[0][jm - 1]; 613 } 614 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 615 t2a[im - 1][jm - 1] = t2c[im - 1][jm - 1] - t2d[im - 1][jm - 1] + eig2 * t2g[im - 1][jm - 1] + h1inv * t2h[im - 1][jm - 1] + lf * t2e[im - 1][jm - 1] - lf * t2f[im - 1][jm - 1]; 616 t2b[im - 1][jm - 1] = hh1 * t2c[im - 1][jm - 1] + hh3 * t2d[im - 1][jm - 1] + hinv * t2h[im - 1][jm - 1] + lf * hh1 * t2e[im - 1][jm - 1] + lf * hh3 * t2f[im - 1][jm - 1]; 617 } 618 if (gp[procid].neighbors[UP] == -1) { 619 t1a = (double *) t2a[0]; 620 t1b = (double *) t2b[0]; 621 t1c = (double *) t2c[0]; 622 t1d = (double *) t2d[0]; 623 t1e = (double *) t2e[0]; 624 t1f = (double *) t2f[0]; 625 t1g = (double *) t2g[0]; 626 t1h = (double *) t2h[0]; 627 for (j = firstcol; j <= lastcol; j++) { 628 t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j]; 629 t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j]; 630 } 631 } 632 if (gp[procid].neighbors[DOWN] == -1) { 633 t1a = (double *) t2a[im - 1]; 634 t1b = (double *) t2b[im - 1]; 635 t1c = (double *) t2c[im - 1]; 636 t1d = (double *) t2d[im - 1]; 637 t1e = (double *) t2e[im - 1]; 638 t1f = (double *) t2f[im - 1]; 639 t1g = (double *) t2g[im - 1]; 640 t1h = (double *) t2h[im - 1]; 641 for (j = firstcol; j <= lastcol; j++) { 642 t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j]; 643 t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j]; 644 } 645 } 646 if (gp[procid].neighbors[LEFT] == -1) { 647 for (j = firstrow; j <= lastrow; j++) { 648 t2a[j][0] = t2c[j][0] - t2d[j][0] + eig2 * t2g[j][0] + h1inv * t2h[j][0] + lf * t2e[j][0] - lf * t2f[j][0]; 649 t2b[j][0] = hh1 * t2c[j][0] + hh3 * t2d[j][0] + hinv * t2h[j][0] + lf * hh1 * t2e[j][0] + lf * hh3 * t2f[j][0]; 650 } 651 } 652 if (gp[procid].neighbors[RIGHT] == -1) { 653 for (j = firstrow; j <= lastrow; j++) { 654 t2a[j][jm - 1] = t2c[j][jm - 1] - t2d[j][jm - 1] + eig2 * t2g[j][jm - 1] + h1inv * t2h[j][jm - 1] + lf * t2e[j][jm - 1] - lf * t2f[j][jm - 1]; 655 t2b[j][jm - 1] = hh1 * t2c[j][jm - 1] + hh3 * t2d[j][jm - 1] + hinv * t2h[j][jm - 1] + lf * hh1 * t2e[j][jm - 1] + lf * hh3 * t2f[j][jm - 1]; 656 } 657 } 658 659 for (i = firstrow; i <= lastrow; i++) { 660 t1a = (double *) t2a[i]; 661 t1b = (double *) t2b[i]; 662 t1c = (double *) t2c[i]; 663 t1d = (double *) t2d[i]; 664 t1e = (double *) t2e[i]; 665 t1f = (double *) t2f[i]; 666 t1g = (double *) t2g[i]; 667 t1h = (double *) t2h[i]; 668 for (iindex = firstcol; iindex <= lastcol; iindex++) { 669 t1a[iindex] = t1c[iindex] - t1d[iindex] + eig2 * t1g[iindex] + h1inv * t1h[iindex] + lf * t1e[iindex] - lf * t1f[iindex]; 670 t1b[iindex] = hh1 * t1c[iindex] + hh3 * t1d[iindex] + hinv * t1h[iindex] + lf * hh1 * t1e[iindex] + lf * hh3 * t1f[iindex]; 671 } 672 } 673 674 END_PHASE(procid, 5); 675 676 #if defined(MULTIPLE_BARRIERS) 677 BARRIER(bars->sl_phase_5, nprocs) 678 #else 679 BARRIER(bars->barrier, nprocs) 680 #endif 681 682 /* ******************************************************* 683 684 s i x t h p h a s e 685 686 ******************************************************* */ 687 688 START_PHASE(procid, 6); 689 690 istart = 1; 691 iend = istart + gp[procid].rel_num_y[numlev - 1] - 1; 692 jstart = 1; 693 jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1; 694 ist = istart; 695 ien = iend; 696 jst = jstart; 697 jen = jend; 698 699 if (gp[procid].neighbors[UP] == -1) { 700 istart = 0; 701 } 702 if (gp[procid].neighbors[LEFT] == -1) { 703 jstart = 0; 704 } 705 if (gp[procid].neighbors[DOWN] == -1) { 706 iend = im - 1; 707 } 708 if (gp[procid].neighbors[RIGHT] == -1) { 709 jend = jm - 1; 710 } 711 t2a = (double **) rhs_multi[procid][numlev - 1]; 712 t2b = (double **) ga[procid]; 713 t2c = (double **) oldga[procid]; 714 t2d = (double **) q_multi[procid][numlev - 1]; 715 for (i = istart; i <= iend; i++) { 716 t1a = (double *) t2a[i]; 717 t1b = (double *) t2b[i]; 718 for (j = jstart; j <= jend; j++) { 719 t1a[j] = t1b[j] * ressqr; 720 } 721 } 722 723 if (gp[procid].neighbors[UP] == -1) { 724 t1d = (double *) t2d[0]; 725 t1b = (double *) t2b[0]; 726 for (j = jstart; j <= jend; j++) { 727 t1d[j] = t1b[j]; 728 } 729 } 730 if (gp[procid].neighbors[DOWN] == -1) { 731 t1d = (double *) t2d[im - 1]; 732 t1b = (double *) t2b[im - 1]; 733 for (j = jstart; j <= jend; j++) { 734 t1d[j] = t1b[j]; 735 } 736 } 737 if (gp[procid].neighbors[LEFT] == -1) { 738 for (i = istart; i <= iend; i++) { 739 t2d[i][0] = t2b[i][0]; 740 } 741 } 742 if (gp[procid].neighbors[RIGHT] == -1) { 743 for (i = istart; i <= iend; i++) { 744 t2d[i][jm - 1] = t2b[i][jm - 1]; 745 } 746 } 747 //fac = 1.0 / (4.0 - ressqr*eig2); 748 for (i = ist; i <= ien; i++) { 749 t1d = (double *) t2d[i]; 750 t1c = (double *) t2c[i]; 751 for (j = jst; j <= jen; j++) { 752 t1d[j] = t1c[j]; 753 } 754 } 755 756 if ((procid == MASTER) || (do_stats)) { 757 CLOCK(multi_start); 758 } 759 760 multig(procid); 761 762 if ((procid == MASTER) || (do_stats)) { 763 CLOCK(multi_end); 764 (*gp[procid].multi_time) += (multi_end - multi_start); 765 } 766 767 /* the shared sum variable psiai is initialized to 0 at 768 every time-step */ 769 770 if (procid == MASTER) { 771 global->psiai = 0.0; 772 } 773 774 /* copy the solution for use as initial guess in next time-step */ 775 776 for (i = istart; i <= iend; i++) { 777 t1b = (double *) t2b[i]; 778 t1c = (double *) t2c[i]; 779 t1d = (double *) t2d[i]; 780 for (j = jstart; j <= jend; j++) { 781 t1b[j] = t1d[j]; 782 t1c[j] = t1d[j]; 783 } 784 } 785 786 END_PHASE(procid, 6); 787 788 #if defined(MULTIPLE_BARRIERS) 789 BARRIER(bars->sl_phase_6, nprocs) 790 #else 791 BARRIER(bars->barrier, nprocs) 792 #endif 793 794 /* ******************************************************* 795 796 s e v e n t h p h a s e 797 798 ******************************************************* 799 800 every process computes the running sum for its assigned portion 801 in a private variable psiaipriv */ 802 803 START_PHASE(procid, 7); 804 805 psiaipriv = 0.0; 806 t2a = (double **) ga[procid]; 807 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 808 psiaipriv = psiaipriv + 0.25 * (t2a[0][0]); 809 } 810 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 811 psiaipriv = psiaipriv + 0.25 * (t2a[0][jm - 1]); 812 } 813 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 814 psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][0]); 815 } 816 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 817 psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][jm - 1]); 818 } 819 if (gp[procid].neighbors[UP] == -1) { 820 t1a = (double *) t2a[0]; 821 for (j = firstcol; j <= lastcol; j++) { 822 psiaipriv = psiaipriv + 0.5 * t1a[j]; 823 } 824 } 825 if (gp[procid].neighbors[DOWN] == -1) { 826 t1a = (double *) t2a[im - 1]; 827 for (j = firstcol; j <= lastcol; j++) { 828 psiaipriv = psiaipriv + 0.5 * t1a[j]; 829 } 830 } 831 if (gp[procid].neighbors[LEFT] == -1) { 832 for (j = firstrow; j <= lastrow; j++) { 833 psiaipriv = psiaipriv + 0.5 * t2a[j][0]; 834 } 835 } 836 if (gp[procid].neighbors[RIGHT] == -1) { 837 for (j = firstrow; j <= lastrow; j++) { 838 psiaipriv = psiaipriv + 0.5 * t2a[j][jm - 1]; 839 } 840 } 841 for (i = firstrow; i <= lastrow; i++) { 842 t1a = (double *) t2a[i]; 843 for (iindex = firstcol; iindex <= lastcol; iindex++) { 844 psiaipriv = psiaipriv + t1a[iindex]; 845 } 846 } 847 848 /* after computing its private sum, every process adds that to the 849 shared running sum psiai */ 850 851 LOCK(locks->psiailock) 852 global->psiai = global->psiai + psiaipriv; 853 UNLOCK(locks->psiailock) 854 855 END_PHASE(procid, 7); 856 857 #if defined(MULTIPLE_BARRIERS) 858 BARRIER(bars->sl_phase_7, nprocs) 859 #else 860 BARRIER(bars->barrier, nprocs) 861 #endif 862 863 /* ******************************************************* 864 865 e i g h t h p h a s e 866 867 ******************************************************* 868 869 augment ga(i,j) with [-psiai/psibi]*psib(i,j) */ 870 871 START_PHASE(procid, 8); 872 873 f4 = (-global->psiai) /(global->psibi); 874 875 t2a = (double **) ga[procid]; 876 t2b = (double **) psib[procid]; 877 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 878 t2a[0][0] = t2a[0][0] + f4 * t2b[0][0]; 879 } 880 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 881 t2a[im - 1][0] = t2a[im - 1][0] + f4 * t2b[im - 1][0]; 882 } 883 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 884 t2a[0][jm - 1] = t2a[0][jm - 1] + f4 * t2b[0][jm - 1]; 885 } 886 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 887 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f4 * t2b[im - 1][jm - 1]; 888 } 889 if (gp[procid].neighbors[UP] == -1) { 890 t1a = (double *) t2a[0]; 891 t1b = (double *) t2b[0]; 892 for (j = firstcol; j <= lastcol; j++) { 893 t1a[j] = t1a[j] + f4 * t1b[j]; 894 } 895 } 896 if (gp[procid].neighbors[DOWN] == -1) { 897 t1a = (double *) t2a[im - 1]; 898 t1b = (double *) t2b[im - 1]; 899 for (j = firstcol; j <= lastcol; j++) { 900 t1a[j] = t1a[j] + f4 * t1b[j]; 901 } 902 } 903 if (gp[procid].neighbors[LEFT] == -1) { 904 for (j = firstrow; j <= lastrow; j++) { 905 t2a[j][0] = t2a[j][0] + f4 * t2b[j][0]; 906 } 907 } 908 if (gp[procid].neighbors[RIGHT] == -1) { 909 for (j = firstrow; j <= lastrow; j++) { 910 t2a[j][jm - 1] = t2a[j][jm - 1] + f4 * t2b[j][jm - 1]; 911 } 912 } 913 for (i = firstrow; i <= lastrow; i++) { 914 t1a = (double *) t2a[i]; 915 t1b = (double *) t2b[i]; 916 for (iindex = firstcol; iindex <= lastcol; iindex++) { 917 t1a[iindex] = t1a[iindex] + f4 * t1b[iindex]; 918 } 919 } 920 921 t2a = (double **) rhs_multi[procid][numlev - 1]; 922 t2b = (double **) gb[procid]; 923 t2c = (double **) oldgb[procid]; 924 t2d = (double **) q_multi[procid][numlev - 1]; 925 for (i = istart; i <= iend; i++) { 926 t1a = (double *) t2a[i]; 927 t1b = (double *) t2b[i]; 928 for (j = jstart; j <= jend; j++) { 929 t1a[j] = t1b[j] * ressqr; 930 } 931 } 932 if (gp[procid].neighbors[UP] == -1) { 933 t1d = (double *) t2d[0]; 934 t1b = (double *) t2b[0]; 935 for (j = jstart; j <= jend; j++) { 936 t1d[j] = t1b[j]; 937 } 938 } 939 if (gp[procid].neighbors[DOWN] == -1) { 940 t1d = (double *) t2d[im - 1]; 941 t1b = (double *) t2b[im - 1]; 942 for (j = jstart; j <= jend; j++) { 943 t1d[j] = t1b[j]; 944 } 945 } 946 if (gp[procid].neighbors[LEFT] == -1) { 947 for (i = istart; i <= iend; i++) { 948 t2d[i][0] = t2b[i][0]; 949 } 950 } 951 if (gp[procid].neighbors[RIGHT] == -1) { 952 for (i = istart; i <= iend; i++) { 953 t2d[i][jm - 1] = t2b[i][jm - 1]; 954 } 955 } 956 //fac = 1.0 / (4.0 - ressqr*eig2); 957 for (i = ist; i <= ien; i++) { 958 t1d = (double *) t2d[i]; 959 t1c = (double *) t2c[i]; 960 for (j = jst; j <= jen; j++) { 961 t1d[j] = t1c[j]; 962 } 963 } 964 965 if ((procid == MASTER) || (do_stats)) { 966 CLOCK(multi_start); 967 } 968 969 multig(procid); 970 971 if ((procid == MASTER) || (do_stats)) { 972 CLOCK(multi_end); 973 (*gp[procid].multi_time) += (multi_end - multi_start); 974 } 975 976 for (i = istart; i <= iend; i++) { 977 t1b = (double *) t2b[i]; 978 t1c = (double *) t2c[i]; 979 t1d = (double *) t2d[i]; 980 for (j = jstart; j <= jend; j++) { 981 t1b[j] = t1d[j]; 982 t1c[j] = t1d[j]; 983 } 984 } 985 986 END_PHASE(procid, 8); 987 988 #if defined(MULTIPLE_BARRIERS) 989 BARRIER(bars->sl_phase_8, nprocs) 990 #else 991 BARRIER(bars->barrier, nprocs) 992 #endif 993 994 /* ******************************************************* 995 996 n i n t h p h a s e 997 998 ******************************************************* 999 1000 put appropriate linear combinations of ga and gb in work2 and work3; 1001 note that here (as in most cases) the constant multipliers are made 1002 private variables; the specific order in which things are done is 1003 chosen in order to hopefully reuse things brought into the cache 1004 1005 note that here again we choose to have all processes share the work 1006 on both matrices despite the fact that the work done per element 1007 is the same, because the operand matrices are the same in both cases */ 1008 1009 START_PHASE(procid, 9); 1010 1011 t2a = (double **) ga[procid]; 1012 t2b = (double **) gb[procid]; 1013 t2c = (double **) work2[procid]; 1014 t2d = (double **) work3[procid]; 1015 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1016 t2c[0][0] = t2b[0][0] - hh1 * t2a[0][0]; 1017 t2d[0][0] = t2b[0][0] + hh3 * t2a[0][0]; 1018 } 1019 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1020 t2c[im - 1][0] = t2b[im - 1][0] - hh1 * t2a[im - 1][0]; 1021 t2d[im - 1][0] = t2b[im - 1][0] + hh3 * t2a[im - 1][0]; 1022 } 1023 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1024 t2c[0][jm - 1] = t2b[0][jm - 1] - hh1 * t2a[0][jm - 1]; 1025 t2d[0][jm - 1] = t2b[0][jm - 1] + hh3 * t2a[0][jm - 1]; 1026 } 1027 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1028 t2c[im - 1][jm - 1] = t2b[im - 1][jm - 1] - hh1 * t2a[im - 1][jm - 1]; 1029 t2d[im - 1][jm - 1] = t2b[im - 1][jm - 1] + hh3 * t2a[im - 1][jm - 1]; 1030 } 1031 if (gp[procid].neighbors[UP] == -1) { 1032 t1a = (double *) t2a[0]; 1033 t1b = (double *) t2b[0]; 1034 t1c = (double *) t2c[0]; 1035 t1d = (double *) t2d[0]; 1036 for (j = firstcol; j <= lastcol; j++) { 1037 t1d[j] = t1b[j] + hh3 * t1a[j]; 1038 t1c[j] = t1b[j] - hh1 * t1a[j]; 1039 } 1040 } 1041 if (gp[procid].neighbors[DOWN] == -1) { 1042 t1a = (double *) t2a[im - 1]; 1043 t1b = (double *) t2b[im - 1]; 1044 t1c = (double *) t2c[im - 1]; 1045 t1d = (double *) t2d[im - 1]; 1046 for (j = firstcol; j <= lastcol; j++) { 1047 t1d[j] = t1b[j] + hh3 * t1a[j]; 1048 t1c[j] = t1b[j] - hh1 * t1a[j]; 1049 } 1050 } 1051 if (gp[procid].neighbors[LEFT] == -1) { 1052 for (j = firstrow; j <= lastrow; j++) { 1053 t2d[j][0] = t2b[j][0] + hh3 * t2a[j][0]; 1054 t2c[j][0] = t2b[j][0] - hh1 * t2a[j][0]; 1055 } 1056 } 1057 if (gp[procid].neighbors[RIGHT] == -1) { 1058 for (j = firstrow; j <= lastrow; j++) { 1059 t2d[j][jm - 1] = t2b[j][jm - 1] + hh3 * t2a[j][jm - 1]; 1060 t2c[j][jm - 1] = t2b[j][jm - 1] - hh1 * t2a[j][jm - 1]; 1061 } 1062 } 1063 1064 for (i = firstrow; i <= lastrow; i++) { 1065 t1a = (double *) t2a[i]; 1066 t1b = (double *) t2b[i]; 1067 t1c = (double *) t2c[i]; 1068 t1d = (double *) t2d[i]; 1069 for (iindex = firstcol; iindex <= lastcol; iindex++) { 1070 t1d[iindex] = t1b[iindex] + hh3 * t1a[iindex]; 1071 t1c[iindex] = t1b[iindex] - hh1 * t1a[iindex]; 1072 } 1073 } 1074 1075 END_PHASE(procid, 9); 1076 1077 #if defined(MULTIPLE_BARRIERS) 1078 BARRIER(bars->sl_phase_9, nprocs) 1079 #else 1080 BARRIER(bars->barrier, nprocs) 1081 #endif 1082 1083 /* ******************************************************* 1084 1085 t e n t h p h a s e 1086 1087 *******************************************************/ 1088 1089 START_PHASE(procid, 10); 1090 timst = 2 * dtau; 1091 1092 /* update the psi{1,3} matrices by adding 2*dtau*work3 to each */ 1093 1094 t2a = (double **) psi[procid][0]; 1095 t2b = (double **) work3[procid]; 1096 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1097 t2a[0][0] = t2a[0][0] + timst * t2b[0][0]; 1098 } 1099 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1100 t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0]; 1101 } 1102 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1103 t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1]; 1104 } 1105 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1106 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1]; 1107 } 1108 if (gp[procid].neighbors[UP] == -1) { 1109 t1a = (double *) t2a[0]; 1110 t1b = (double *) t2b[0]; 1111 for (j = firstcol; j <= lastcol; j++) { 1112 t1a[j] = t1a[j] + timst * t1b[j]; 1113 } 1114 } 1115 if (gp[procid].neighbors[DOWN] == -1) { 1116 t1a = (double *) t2a[im - 1]; 1117 t1b = (double *) t2b[im - 1]; 1118 for (j = firstcol; j <= lastcol; j++) { 1119 t1a[j] = t1a[j] + timst * t1b[j]; 1120 } 1121 } 1122 if (gp[procid].neighbors[LEFT] == -1) { 1123 for (j = firstrow; j <= lastrow; j++) { 1124 t2a[j][0] = t2a[j][0] + timst * t2b[j][0]; 1125 } 1126 } 1127 if (gp[procid].neighbors[RIGHT] == -1) { 1128 for (j = firstrow; j <= lastrow; j++) { 1129 t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1]; 1130 } 1131 } 1132 for (i = firstrow; i <= lastrow; i++) { 1133 t1a = (double *) t2a[i]; 1134 t1b = (double *) t2b[i]; 1135 for (iindex = firstcol; iindex <= lastcol; iindex++) { 1136 t1a[iindex] = t1a[iindex] + timst * t1b[iindex]; 1137 } 1138 } 1139 1140 t2a = (double **) psi[procid][1]; 1141 t2b = (double **) work2[procid]; 1142 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1143 t2a[0][0] = t2a[0][0] + timst * t2b[0][0]; 1144 } 1145 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 1146 t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0]; 1147 } 1148 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1149 t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1]; 1150 } 1151 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 1152 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1]; 1153 } 1154 if (gp[procid].neighbors[UP] == -1) { 1155 t1a = (double *) t2a[0]; 1156 t1b = (double *) t2b[0]; 1157 for (j = firstcol; j <= lastcol; j++) { 1158 t1a[j] = t1a[j] + timst * t1b[j]; 1159 } 1160 } 1161 if (gp[procid].neighbors[DOWN] == -1) { 1162 t1a = (double *) t2a[im - 1]; 1163 t1b = (double *) t2b[im - 1]; 1164 for (j = firstcol; j <= lastcol; j++) { 1165 t1a[j] = t1a[j] + timst * t1b[j]; 1166 } 1167 } 1168 if (gp[procid].neighbors[LEFT] == -1) { 1169 for (j = firstrow; j <= lastrow; j++) { 1170 t2a[j][0] = t2a[j][0] + timst * t2b[j][0]; 1171 } 1172 } 1173 if (gp[procid].neighbors[RIGHT] == -1) { 1174 for (j = firstrow; j <= lastrow; j++) { 1175 t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1]; 1176 } 1177 } 1178 1179 for (i = firstrow; i <= lastrow; i++) { 1180 t1a = (double *) t2a[i]; 1181 t1b = (double *) t2b[i]; 1182 for (iindex = firstcol; iindex <= lastcol; iindex++) { 1183 t1a[iindex] = t1a[iindex] + timst * t1b[iindex]; 1184 } 1185 } 1186 1187 END_PHASE(procid, 10); 1188 1189 #if defined(MULTIPLE_BARRIERS) 1190 BARRIER(bars->sl_phase_10, nprocs) 1191 #else 1192 BARRIER(bars->barrier, nprocs) 1193 #endif 1194 }
Note: See TracChangeset
for help on using the changeset viewer.