Changeset 598 for soft/giet_vm/applications/ocean/slave1.C
- Timestamp:
- Jul 9, 2015, 2:11:17 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/ocean/slave1.C
r589 r598 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 2 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 16 17 /* **************** 18 subroutine slave 19 **************** */ 20 21 EXTERN_ENV 22 23 #include <stdio.h> 24 #include <math.h> 25 #include <stdlib.h> 26 27 #include "decs.h" 28 29 void slave(long *ptr_procid) 30 { 31 long i; 32 long j; 33 long nstep; 34 long iindex; 35 long iday; 36 double ysca1; 37 double y; 38 double factor; 39 double sintemp; 40 double curlt; 41 double ressqr; 42 long istart; 43 long iend; 44 long jstart; 45 long jend; 46 long ist; 47 long ien; 48 long jst; 49 long jen; 50 double fac; 51 long dayflag = 0; 52 long dhourflag = 0; 53 long endflag = 0; 54 long firstrow; 55 long lastrow; 56 long numrows; 57 long firstcol; 58 long lastcol; 59 long numcols; 60 long psiindex; 61 double psibipriv; 62 double ttime; 63 double dhour; 64 double day; 65 long procid; 66 long j_off = 0; 67 unsigned long t1; 68 double **t2a; 69 double **t2b; 70 double *t1a; 71 double *t1b; 72 double *t1c; 73 double *t1d; 74 75 76 /* 77 LOCK(locks->idlock) 78 procid = global->id; 79 global->id = global->id+1; 80 UNLOCK(locks->idlock) 81 */ 82 83 procid = *ptr_procid; 84 ressqr = lev_res[numlev - 1] * lev_res[numlev - 1]; 85 86 #if defined(MULTIPLE_BARRIERS) 87 BARRIER(bars->sl_prini, nprocs) 88 #else 89 BARRIER(bars->barrier, nprocs) 90 #endif 91 /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to 92 processors to avoid migration. */ 93 /* POSSIBLE ENHANCEMENT: Here is where one might distribute 94 data structures across physically distributed memories as 95 desired. 96 97 One way to do this is as follows. The function allocate(START,SIZE,I) 98 is assumed to place all addresses x such that 99 (START <= x < START+SIZE) on node I. 100 101 long d_size; 102 unsigned long g_size; 103 unsigned long mg_size; 104 105 if (procid == MASTER) { 106 g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz 107 eof(double) + 108 ((imx[numlev-1]-2)/yprocs+2)*sizeof(double *); 109 110 mg_size = numlev*sizeof(double **); 111 for (i=0;i<numlev;i++) { 112 mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+ 113 ((imx[i]-2)/yprocs+2)*sizeof(double *); 114 } 115 for (i= 0;i<nprocs;i++) { 116 d_size = 2*sizeof(double **); 117 allocate((unsigned long) psi[i],d_size,i); 118 allocate((unsigned long) psim[i],d_size,i); 119 allocate((unsigned long) work1[i],d_size,i); 120 allocate((unsigned long) work4[i],d_size,i); 121 allocate((unsigned long) work5[i],d_size,i); 122 allocate((unsigned long) work7[i],d_size,i); 123 allocate((unsigned long) temparray[i],d_size,i); 124 allocate((unsigned long) psi[i][0],g_size,i); 125 allocate((unsigned long) psi[i][1],g_size,i); 126 allocate((unsigned long) psim[i][0],g_size,i); 127 allocate((unsigned long) psim[i][1],g_size,i); 128 allocate((unsigned long) psium[i],g_size,i); 129 allocate((unsigned long) psilm[i],g_size,i); 130 allocate((unsigned long) psib[i],g_size,i); 131 allocate((unsigned long) ga[i],g_size,i); 132 allocate((unsigned long) gb[i],g_size,i); 133 allocate((unsigned long) work1[i][0],g_size,i); 134 allocate((unsigned long) work1[i][1],g_size,i); 135 allocate((unsigned long) work2[i],g_size,i); 136 allocate((unsigned long) work3[i],g_size,i); 137 allocate((unsigned long) work4[i][0],g_size,i); 138 allocate((unsigned long) work4[i][1],g_size,i); 139 allocate((unsigned long) work5[i][0],g_size,i); 140 allocate((unsigned long) work5[i][1],g_size,i); 141 allocate((unsigned long) work6[i],g_size,i); 142 allocate((unsigned long) work7[i][0],g_size,i); 143 allocate((unsigned long) work7[i][1],g_size,i); 144 allocate((unsigned long) temparray[i][0],g_size,i); 145 allocate((unsigned long) temparray[i][1],g_size,i); 146 allocate((unsigned long) tauz[i],g_size,i); 147 allocate((unsigned long) oldga[i],g_size,i); 148 allocate((unsigned long) oldgb[i],g_size,i); 149 d_size = numlev * sizeof(long); 150 allocate((unsigned long) gp[i].rel_num_x,d_size,i); 151 allocate((unsigned long) gp[i].rel_num_y,d_size,i); 152 allocate((unsigned long) gp[i].eist,d_size,i); 153 allocate((unsigned long) gp[i].ejst,d_size,i); 154 allocate((unsigned long) gp[i].oist,d_size,i); 155 allocate((unsigned long) gp[i].ojst,d_size,i); 156 allocate((unsigned long) gp[i].rlist,d_size,i); 157 allocate((unsigned long) gp[i].rljst,d_size,i); 158 allocate((unsigned long) gp[i].rlien,d_size,i); 159 allocate((unsigned long) gp[i].rljen,d_size,i); 160 161 allocate((unsigned long) q_multi[i],mg_size,i); 162 allocate((unsigned long) rhs_multi[i],mg_size,i); 163 allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i); 164 } 165 } 166 167 */ 168 t2a = (double **) oldga[procid]; 169 t2b = (double **) oldgb[procid]; 170 for (i = 0; i < im; i++) { 171 t1a = (double *) t2a[i]; 172 t1b = (double *) t2b[i]; 173 for (j = 0; j < jm; j++) { 174 t1a[j] = 0.0; 175 t1b[j] = 0.0; 176 } 177 } 178 179 firstcol = 1; 180 lastcol = firstcol + gp[procid].rel_num_x[numlev - 1] - 1; 181 firstrow = 1; 182 lastrow = firstrow + gp[procid].rel_num_y[numlev - 1] - 1; 183 numcols = gp[procid].rel_num_x[numlev - 1]; 184 numrows = gp[procid].rel_num_y[numlev - 1]; 185 j_off = (*gp[procid].colnum) * numcols; 186 187 /* 188 if (procid > nprocs/2) { 189 psinum = 2; 190 } else { 191 psinum = 1; 192 } 193 */ 194 195 /* every process gets its own copy of the timing variables to avoid 196 contention at shared memory locations. here, these variables 197 are initialized. */ 198 199 ttime = 0.0; 200 dhour = 0.0; 201 nstep = 0; 202 day = 0.0; 203 204 ysca1 = 0.5 * ysca; 205 206 if (*gp[procid].lpid == MASTER) { 207 208 f = (double *) G_MALLOC(oim * sizeof(double), procid); 209 210 t1a = (double *) f; 211 for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) { 212 y = ((double) iindex) * res; 213 t1a[iindex] = f0 + beta * (y - ysca1); 214 } 215 } 216 217 t2a = (double **) psium[procid]; 218 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 219 t2a[0][0] = 0.0; 220 } 221 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 222 t2a[im - 1][0] = 0.0; 223 } 224 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 225 t2a[0][jm - 1] = 0.0; 226 } 227 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 228 t2a[im - 1][jm - 1] = 0.0; 229 } 230 if (gp[procid].neighbors[UP] == -1) { 231 t1a = (double *) t2a[0]; 232 for (j = firstcol; j <= lastcol; j++) { 233 t1a[j] = 0.0; 234 } 235 } 236 if (gp[procid].neighbors[DOWN] == -1) { 237 t1a = (double *) t2a[im - 1]; 238 for (j = firstcol; j <= lastcol; j++) { 239 t1a[j] = 0.0; 240 } 241 } 242 if (gp[procid].neighbors[LEFT] == -1) { 243 for (j = firstrow; j <= lastrow; j++) { 244 t2a[j][0] = 0.0; 245 } 246 } 247 if (gp[procid].neighbors[RIGHT] == -1) { 248 for (j = firstrow; j <= lastrow; j++) { 249 t2a[j][jm - 1] = 0.0; 250 } 251 } 252 253 for (i = firstrow; i <= lastrow; i++) { 254 t1a = (double *) t2a[i]; 255 for (iindex = firstcol; iindex <= lastcol; iindex++) { 256 t1a[iindex] = 0.0; 257 } 258 } 259 t2a = (double **) psilm[procid]; 260 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 261 t2a[0][0] = 0.0; 262 } 263 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 264 t2a[im - 1][0] = 0.0; 265 } 266 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 267 t2a[0][jm - 1] = 0.0; 268 } 269 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 270 t2a[im - 1][jm - 1] = 0.0; 271 } 272 if (gp[procid].neighbors[UP] == -1) { 273 t1a = (double *) t2a[0]; 274 for (j = firstcol; j <= lastcol; j++) { 275 t1a[j] = 0.0; 276 } 277 } 278 if (gp[procid].neighbors[DOWN] == -1) { 279 t1a = (double *) t2a[im - 1]; 280 for (j = firstcol; j <= lastcol; j++) { 281 t1a[j] = 0.0; 282 } 283 } 284 if (gp[procid].neighbors[LEFT] == -1) { 285 for (j = firstrow; j <= lastrow; j++) { 286 t2a[j][0] = 0.0; 287 } 288 } 289 if (gp[procid].neighbors[RIGHT] == -1) { 290 for (j = firstrow; j <= lastrow; j++) { 291 t2a[j][jm - 1] = 0.0; 292 } 293 } 294 for (i = firstrow; i <= lastrow; i++) { 295 t1a = (double *) t2a[i]; 296 for (iindex = firstcol; iindex <= lastcol; iindex++) { 297 t1a[iindex] = 0.0; 298 } 299 } 300 301 t2a = (double **) psib[procid]; 302 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 303 t2a[0][0] = 1.0; 304 } 305 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 306 t2a[0][jm - 1] = 1.0; 307 } 308 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 309 t2a[im - 1][0] = 1.0; 310 } 311 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 312 t2a[im - 1][jm - 1] = 1.0; 313 } 314 if (gp[procid].neighbors[UP] == -1) { 315 t1a = (double *) t2a[0]; 316 for (j = firstcol; j <= lastcol; j++) { 317 t1a[j] = 1.0; 318 } 319 } 320 if (gp[procid].neighbors[DOWN] == -1) { 321 t1a = (double *) t2a[im - 1]; 322 for (j = firstcol; j <= lastcol; j++) { 323 t1a[j] = 1.0; 324 } 325 } 326 if (gp[procid].neighbors[LEFT] == -1) { 327 for (j = firstrow; j <= lastrow; j++) { 328 t2a[j][0] = 1.0; 329 } 330 } 331 if (gp[procid].neighbors[RIGHT] == -1) { 332 for (j = firstrow; j <= lastrow; j++) { 333 t2a[j][jm - 1] = 1.0; 334 } 335 } 336 for (i = firstrow; i <= lastrow; i++) { 337 t1a = (double *) t2a[i]; 338 for (iindex = firstcol; iindex <= lastcol; iindex++) { 339 t1a[iindex] = 0.0; 340 } 341 } 342 343 /* wait until all processes have completed the above initialization */ 344 #if defined(MULTIPLE_BARRIERS) 345 BARRIER(bars->sl_prini, nprocs) 346 #else 347 BARRIER(bars->barrier, nprocs) 348 #endif 349 /* compute psib array (one-time computation) and integrate into psibi */ 350 istart = 1; 351 iend = istart + gp[procid].rel_num_y[numlev - 1] - 1; 352 jstart = 1; 353 jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1; 354 ist = istart; 355 ien = iend; 356 jst = jstart; 357 jen = jend; 358 359 if (gp[procid].neighbors[UP] == -1) { 360 istart = 0; 361 } 362 if (gp[procid].neighbors[LEFT] == -1) { 363 jstart = 0; 364 } 365 if (gp[procid].neighbors[DOWN] == -1) { 366 iend = im - 1; 367 } 368 if (gp[procid].neighbors[RIGHT] == -1) { 369 jend = jm - 1; 370 } 371 372 t2a = (double **) rhs_multi[procid][numlev - 1]; 373 t2b = (double **) psib[procid]; 374 for (i = istart; i <= iend; i++) { 375 t1a = (double *) t2a[i]; 376 t1b = (double *) t2b[i]; 377 for (j = jstart; j <= jend; j++) { 378 t1a[j] = t1b[j] * ressqr; 379 } 380 } 381 t2a = (double **) q_multi[procid][numlev - 1]; 382 if (gp[procid].neighbors[UP] == -1) { 383 t1a = (double *) t2a[0]; 384 t1b = (double *) t2b[0]; 385 for (j = jstart; j <= jend; j++) { 386 t1a[j] = t1b[j]; 387 } 388 } 389 if (gp[procid].neighbors[DOWN] == -1) { 390 t1a = (double *) t2a[im - 1]; 391 t1b = (double *) t2b[im - 1]; 392 for (j = jstart; j <= jend; j++) { 393 t1a[j] = t1b[j]; 394 } 395 } 396 if (gp[procid].neighbors[LEFT] == -1) { 397 for (i = istart; i <= iend; i++) { 398 t2a[i][0] = t2b[i][0]; 399 } 400 } 401 if (gp[procid].neighbors[RIGHT] == -1) { 402 for (i = istart; i <= iend; i++) { 403 t2a[i][jm - 1] = t2b[i][jm - 1]; 404 } 405 } 406 407 #if defined(MULTIPLE_BARRIERS) 408 BARRIER(bars->sl_psini, nprocs) 409 #else 410 BARRIER(bars->barrier, nprocs) 411 #endif 412 413 t2a = (double **) psib[procid]; 414 j = gp[procid].neighbors[UP]; 415 if (j != -1) { 416 t1a = (double *) t2a[0]; 417 t1b = (double *) psib[j][im - 2]; 418 for (i = 1; i < jm - 1; i++) { 419 t1a[i] = t1b[i]; 420 } 421 } 422 j = gp[procid].neighbors[DOWN]; 423 if (j != -1) { 424 t1a = (double *) t2a[im - 1]; 425 t1b = (double *) psib[j][1]; 426 for (i = 1; i < jm - 1; i++) { 427 t1a[i] = t1b[i]; 428 } 429 } 430 j = gp[procid].neighbors[LEFT]; 431 if (j != -1) { 432 t2b = (double **) psib[j]; 433 for (i = 1; i < im - 1; i++) { 434 t2a[i][0] = t2b[i][jm - 2]; 435 } 436 } 437 j = gp[procid].neighbors[RIGHT]; 438 if (j != -1) { 439 t2b = (double **) psib[j]; 440 for (i = 1; i < im - 1; i++) { 441 t2a[i][jm - 1] = t2b[i][1]; 442 } 443 } 444 445 t2a = (double **) q_multi[procid][numlev - 1]; 446 t2b = (double **) psib[procid]; 447 fac = 1.0 / (4.0 - ressqr * eig2); 448 for (i = ist; i <= ien; i++) { 449 t1a = (double *) t2a[i]; 450 t1b = (double *) t2b[i]; 451 t1c = (double *) t2b[i - 1]; 452 t1d = (double *) t2b[i + 1]; 453 for (j = jst; j <= jen; j++) { 454 t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]); 455 } 456 } 457 458 multig(procid); 459 460 for (i = istart; i <= iend; i++) { 461 t1a = (double *) t2a[i]; 462 t1b = (double *) t2b[i]; 463 for (j = jstart; j <= jend; j++) { 464 t1b[j] = t1a[j]; 465 } 466 } 467 468 #if defined(MULTIPLE_BARRIERS) 469 BARRIER(bars->sl_prini, nprocs) 470 #else 471 BARRIER(bars->barrier, nprocs) 472 #endif 473 474 /* update the local running sum psibipriv by summing all the resulting 475 values in that process's share of the psib matrix */ 476 477 t2a = (double **) psib[procid]; 478 psibipriv = 0.0; 479 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 480 psibipriv = psibipriv + 0.25 * (t2a[0][0]); 481 } 482 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 483 psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]); 484 } 485 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 486 psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]); 487 } 488 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 489 psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]); 490 } 491 if (gp[procid].neighbors[UP] == -1) { 492 t1a = (double *) t2a[0]; 493 for (j = firstcol; j <= lastcol; j++) { 494 psibipriv = psibipriv + 0.5 * t1a[j]; 495 } 496 } 497 if (gp[procid].neighbors[DOWN] == -1) { 498 t1a = (double *) t2a[im - 1]; 499 for (j = firstcol; j <= lastcol; j++) { 500 psibipriv = psibipriv + 0.5 * t1a[j]; 501 } 502 } 503 if (gp[procid].neighbors[LEFT] == -1) { 504 for (j = firstrow; j <= lastrow; j++) { 505 psibipriv = psibipriv + 0.5 * t2a[j][0]; 506 } 507 } 508 if (gp[procid].neighbors[RIGHT] == -1) { 509 for (j = firstrow; j <= lastrow; j++) { 510 psibipriv = psibipriv + 0.5 * t2a[j][jm - 1]; 511 } 512 } 513 for (i = firstrow; i <= lastrow; i++) { 514 t1a = (double *) t2a[i]; 515 for (iindex = firstcol; iindex <= lastcol; iindex++) { 516 psibipriv = psibipriv + t1a[iindex]; 517 } 518 } 519 520 /* update the shared variable psibi by summing all the psibiprivs 521 of the individual processes into it. note that this combined 522 private and shared sum method avoids accessing the shared 523 variable psibi once for every element of the matrix. */ 524 525 LOCK(locks->psibilock); 526 global->psibi = global->psibi + psibipriv; 527 UNLOCK(locks->psibilock); 528 529 /* initialize psim matrices 530 531 if there is more than one process, then split the processes 532 between the two psim matrices; otherwise, let the single process 533 work on one first and then the other */ 534 535 for (psiindex = 0; psiindex <= 1; psiindex++) { 536 t2a = (double **) psim[procid][psiindex]; 537 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 538 t2a[0][0] = 0.0; 539 } 540 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 541 t2a[im - 1][0] = 0.0; 542 } 543 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 544 t2a[0][jm - 1] = 0.0; 545 } 546 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 547 t2a[im - 1][jm - 1] = 0.0; 548 } 549 if (gp[procid].neighbors[UP] == -1) { 550 t1a = (double *) t2a[0]; 551 for (j = firstcol; j <= lastcol; j++) { 552 t1a[j] = 0.0; 553 } 554 } 555 if (gp[procid].neighbors[DOWN] == -1) { 556 t1a = (double *) t2a[im - 1]; 557 for (j = firstcol; j <= lastcol; j++) { 558 t1a[j] = 0.0; 559 } 560 } 561 if (gp[procid].neighbors[LEFT] == -1) { 562 for (j = firstrow; j <= lastrow; j++) { 563 t2a[j][0] = 0.0; 564 } 565 } 566 if (gp[procid].neighbors[RIGHT] == -1) { 567 for (j = firstrow; j <= lastrow; j++) { 568 t2a[j][jm - 1] = 0.0; 569 } 570 } 571 for (i = firstrow; i <= lastrow; i++) { 572 t1a = (double *) t2a[i]; 573 for (iindex = firstcol; iindex <= lastcol; iindex++) { 574 t1a[iindex] = 0.0; 575 } 576 } 577 } 578 579 /* initialize psi matrices the same way */ 580 581 for (psiindex = 0; psiindex <= 1; psiindex++) { 582 t2a = (double **) psi[procid][psiindex]; 583 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 584 t2a[0][0] = 0.0; 585 } 586 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 587 t2a[0][jm - 1] = 0.0; 588 } 589 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 590 t2a[im - 1][0] = 0.0; 591 } 592 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 593 t2a[im - 1][jm - 1] = 0.0; 594 } 595 if (gp[procid].neighbors[UP] == -1) { 596 t1a = (double *) t2a[0]; 597 for (j = firstcol; j <= lastcol; j++) { 598 t1a[j] = 0.0; 599 } 600 } 601 if (gp[procid].neighbors[DOWN] == -1) { 602 t1a = (double *) t2a[im - 1]; 603 for (j = firstcol; j <= lastcol; j++) { 604 t1a[j] = 0.0; 605 } 606 } 607 if (gp[procid].neighbors[LEFT] == -1) { 608 for (j = firstrow; j <= lastrow; j++) { 609 t2a[j][0] = 0.0; 610 } 611 } 612 if (gp[procid].neighbors[RIGHT] == -1) { 613 for (j = firstrow; j <= lastrow; j++) { 614 t2a[j][jm - 1] = 0.0; 615 } 616 } 617 for (i = firstrow; i <= lastrow; i++) { 618 t1a = (double *) t2a[i]; 619 for (iindex = firstcol; iindex <= lastcol; iindex++) { 620 t1a[iindex] = 0.0; 621 } 622 } 623 } 624 625 /* compute input curl of wind stress */ 626 627 628 t2a = (double **) tauz[procid]; 629 ysca1 = .5 * ysca; 630 factor = -t0 * pi / ysca1; 631 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 632 t2a[0][0] = 0.0; 633 } 634 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 635 t2a[im - 1][0] = 0.0; 636 } 637 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 638 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; 639 sintemp = sin(sintemp); 640 t2a[0][jm - 1] = factor * sintemp; 641 } 642 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 643 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; 644 sintemp = sin(sintemp); 645 t2a[im - 1][jm - 1] = factor * sintemp; 646 } 647 if (gp[procid].neighbors[UP] == -1) { 648 t1a = (double *) t2a[0]; 649 for (j = firstcol; j <= lastcol; j++) { 650 sintemp = pi * ((double) j + j_off) * res / ysca1; 651 sintemp = sin(sintemp); 652 curlt = factor * sintemp; 653 t1a[j] = curlt; 654 } 655 } 656 if (gp[procid].neighbors[DOWN] == -1) { 657 t1a = (double *) t2a[im - 1]; 658 for (j = firstcol; j <= lastcol; j++) { 659 sintemp = pi * ((double) j + j_off) * res / ysca1; 660 sintemp = sin(sintemp); 661 curlt = factor * sintemp; 662 t1a[j] = curlt; 663 } 664 } 665 if (gp[procid].neighbors[LEFT] == -1) { 666 for (j = firstrow; j <= lastrow; j++) { 667 t2a[j][0] = 0.0; 668 } 669 } 670 if (gp[procid].neighbors[RIGHT] == -1) { 671 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; 672 sintemp = sin(sintemp); 673 curlt = factor * sintemp; 674 for (j = firstrow; j <= lastrow; j++) { 675 t2a[j][jm - 1] = curlt; 676 } 677 } 678 for (i = firstrow; i <= lastrow; i++) { 679 t1a = (double *) t2a[i]; 680 for (iindex = firstcol; iindex <= lastcol; iindex++) { 681 sintemp = pi * ((double) iindex + j_off) * res / ysca1; 682 sintemp = sin(sintemp); 683 curlt = factor * sintemp; 684 t1a[iindex] = curlt; 685 } 686 } 687 688 #if defined(MULTIPLE_BARRIERS) 689 BARRIER(bars->sl_onetime, nprocs) 690 #else 691 BARRIER(bars->barrier, nprocs) 692 #endif 693 694 /*************************************************************** 695 one-time stuff over at this point 696 ***************************************************************/ 697 while (!endflag) { 698 while ((!dayflag) || (!dhourflag)) { 699 dayflag = 0; 700 dhourflag = 0; 701 if (nstep == 1) { 702 for (i = 0; i < 10; i++) { 703 gp[procid].steps_time[i] = 0; 704 } 705 if (procid == MASTER) { 706 CLOCK(global->trackstart) 707 } 708 if ((procid == MASTER) || (do_stats)) { 709 CLOCK(t1); 710 (*gp[procid].total_time) = t1; 711 (*gp[procid].multi_time) = 0; 712 } 713 /* POSSIBLE ENHANCEMENT: Here is where one might reset the 714 statistics that one is measuring about the parallel execution */ 715 } 716 717 slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols); 718 719 /* update time and step number 720 note that these time and step variables are private i.e. every 721 process has its own copy and keeps track of its own time */ 722 723 ttime = ttime + dtau; 724 nstep = nstep + 1; 725 day = ttime / 86400.0; 726 727 if (day > ((double) outday0)) { 728 dayflag = 1; 729 iday = (long) day; 730 dhour = dhour + dtau; 731 if (dhour >= 86400.0) { 732 dhourflag = 1; 733 } 734 } 735 } 736 dhour = 0.0; 737 738 t2a = (double **) psium[procid]; 739 t2b = (double **) psim[procid][0]; 740 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 741 t2a[0][0] = t2a[0][0] + t2b[0][0]; 742 } 743 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 744 t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0]; 745 } 746 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 747 t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1]; 748 } 749 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 750 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1]; 751 } 752 if (gp[procid].neighbors[UP] == -1) { 753 t1a = (double *) t2a[0]; 754 t1b = (double *) t2b[0]; 755 for (j = firstcol; j <= lastcol; j++) { 756 t1a[j] = t1a[j] + t1b[j]; 757 } 758 } 759 if (gp[procid].neighbors[DOWN] == -1) { 760 t1a = (double *) t2a[im - 1]; 761 t1b = (double *) t2b[im - 1]; 762 for (j = firstcol; j <= lastcol; j++) { 763 t1a[j] = t1a[j] + t1b[j]; 764 } 765 } 766 if (gp[procid].neighbors[LEFT] == -1) { 767 for (j = firstrow; j <= lastrow; j++) { 768 t2a[j][0] = t2a[j][0] + t2b[j][0]; 769 } 770 } 771 if (gp[procid].neighbors[RIGHT] == -1) { 772 for (j = firstrow; j <= lastrow; j++) { 773 t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1]; 774 } 775 } 776 for (i = firstrow; i <= lastrow; i++) { 777 t1a = (double *) t2a[i]; 778 t1b = (double *) t2b[i]; 779 for (iindex = firstcol; iindex <= lastcol; iindex++) { 780 t1a[iindex] = t1a[iindex] + t1b[iindex]; 781 } 782 } 783 784 /* update values of psilm array to psilm + psim[2] */ 785 786 t2a = (double **) psilm[procid]; 787 t2b = (double **) psim[procid][1]; 788 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 789 t2a[0][0] = t2a[0][0] + t2b[0][0]; 790 } 791 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) { 792 t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0]; 793 } 794 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 795 t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1]; 796 } 797 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) { 798 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1]; 799 } 800 if (gp[procid].neighbors[UP] == -1) { 801 t1a = (double *) t2a[0]; 802 t1b = (double *) t2b[0]; 803 for (j = firstcol; j <= lastcol; j++) { 804 t1a[j] = t1a[j] + t1b[j]; 805 } 806 } 807 if (gp[procid].neighbors[DOWN] == -1) { 808 t1a = (double *) t2a[im - 1]; 809 t1b = (double *) t2b[im - 1]; 810 for (j = firstcol; j <= lastcol; j++) { 811 t1a[j] = t1a[j] + t1b[j]; 812 } 813 } 814 if (gp[procid].neighbors[LEFT] == -1) { 815 for (j = firstrow; j <= lastrow; j++) { 816 t2a[j][0] = t2a[j][0] + t2b[j][0]; 817 } 818 } 819 if (gp[procid].neighbors[RIGHT] == -1) { 820 for (j = firstrow; j <= lastrow; j++) { 821 t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1]; 822 } 823 } 824 for (i = firstrow; i <= lastrow; i++) { 825 t1a = (double *) t2a[i]; 826 t1b = (double *) t2b[i]; 827 for (iindex = firstcol; iindex <= lastcol; iindex++) { 828 t1a[iindex] = t1a[iindex] + t1b[iindex]; 829 } 830 } 831 if (iday >= (long) outday3) { 832 endflag = 1; 833 } 834 } 835 if ((procid == MASTER) || (do_stats)) { 836 CLOCK(t1); 837 (*gp[procid].total_time) = t1 - (*gp[procid].total_time); 838 } 839 }
Note: See TracChangeset
for help on using the changeset viewer.