Ignore:
Timestamp:
Jul 8, 2015, 3:57:15 PM (9 years ago)
Author:
alain
Message:

Modify all applications to support two new rules:
1) introduce a local Makefile for each application.
2) change "application.elf" name to "application/appli.elf" name in the application.py" file.
Introduce the shell application.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/ocean/slave2.C

    r581 r589  
    1 /*************************************************************************/
    2 /*                                                                       */
    3 /*  Copyright (c) 1994 Stanford University                               */
    4 /*                                                                       */
    5 /*  All rights reserved.                                                 */
    6 /*                                                                       */
    7 /*  Permission is given to use, copy, and modify this software for any   */
    8 /*  non-commercial purpose as long as this copyright notice is not       */
    9 /*  removed.  All other uses, including redistribution in whole or in    */
    10 /*  part, are forbidden without prior written permission.                */
    11 /*                                                                       */
    12 /*  This software is provided with absolutely no warranty and no         */
    13 /*  support.                                                             */
    14 /*                                                                       */
    15 /*************************************************************************/
     1#line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET"
    162
    17 /*    ****************
    18       subroutine slave2
    19       ****************  */
    20 
    21 EXTERN_ENV
    22 
    23 #include <stdio.h>
    24 #include <math.h>
    25 #include <stdlib.h>
    26 
    27 #include "decs.h"
    28 
    29 void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols)
    30 {
    31     long i;
    32     long j;
    33     long iindex;
    34     double hh1;
    35     double hh3;
    36     double hinv;
    37     double h1inv;
    38     long istart;
    39     long iend;
    40     long jstart;
    41     long jend;
    42     long ist;
    43     long ien;
    44     long jst;
    45     long jen;
    46     double ressqr;
    47     double psiaipriv;
    48     double f4;
    49     double timst;
    50     long psiindex;
    51     long i_off;
    52     long j_off;
    53     long multi_start;
    54     long multi_end;
    55     double **t2a;
    56     double **t2b;
    57     double **t2c;
    58     double **t2d;
    59     double **t2e;
    60     double **t2f;
    61     double **t2g;
    62     double **t2h;
    63     double *t1a;
    64     double *t1b;
    65     double *t1c;
    66     double *t1d;
    67     double *t1e;
    68     double *t1f;
    69     double *t1g;
    70     double *t1h;
    71 
    72     ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];
    73     i_off = (*gp[procid].rownum) * numrows;
    74     j_off = (*gp[procid].colnum) * numcols;
    75 
    76     START_PHASE(procid, 1);
    77 
    78 /*   ***************************************************************
    79 
    80           f i r s t     p h a s e   (of timestep calculation)
    81 
    82      ***************************************************************/
    83 
    84     t2a = (double **) ga[procid];
    85     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    86         t2a[0][0] = 0.0;
    87     }
    88     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    89         t2a[im - 1][0] = 0.0;
    90     }
    91     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    92         t2a[0][jm - 1] = 0.0;
    93     }
    94     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    95         t2a[im - 1][jm - 1] = 0.0;
    96     }
    97     if (gp[procid].neighbors[UP] == -1) {
    98         t1a = (double *) t2a[0];
    99         for (j = firstcol; j <= lastcol; j++) {
    100             t1a[j] = 0.0;
    101         }
    102     }
    103     if (gp[procid].neighbors[DOWN] == -1) {
    104         t1a = (double *) t2a[im - 1];
    105         for (j = firstcol; j <= lastcol; j++) {
    106             t1a[j] = 0.0;
    107         }
    108     }
    109     if (gp[procid].neighbors[LEFT] == -1) {
    110         for (j = firstrow; j <= lastrow; j++) {
    111             t2a[j][0] = 0.0;
    112         }
    113     }
    114     if (gp[procid].neighbors[RIGHT] == -1) {
    115         for (j = firstrow; j <= lastrow; j++) {
    116             t2a[j][jm - 1] = 0.0;
    117         }
    118     }
    119     for (i = firstrow; i <= lastrow; i++) {
    120         t1a = (double *) t2a[i];
    121         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    122             t1a[iindex] = 0.0;
    123         }
    124     }
    125 
    126     t2a = (double **) gb[procid];
    127     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    128         t2a[0][0] = 0.0;
    129     }
    130     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    131         t2a[im - 1][0] = 0.0;
    132     }
    133     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    134         t2a[0][jm - 1] = 0.0;
    135     }
    136     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    137         t2a[im - 1][jm - 1] = 0.0;
    138     }
    139     if (gp[procid].neighbors[UP] == -1) {
    140         t1a = (double *) t2a[0];
    141         for (j = firstcol; j <= lastcol; j++) {
    142             t1a[j] = 0.0;
    143         }
    144     }
    145     if (gp[procid].neighbors[DOWN] == -1) {
    146         t1a = (double *) t2a[im - 1];
    147         for (j = firstcol; j <= lastcol; j++) {
    148             t1a[j] = 0.0;
    149         }
    150     }
    151     if (gp[procid].neighbors[LEFT] == -1) {
    152         for (j = firstrow; j <= lastrow; j++) {
    153             t2a[j][0] = 0.0;
    154         }
    155     }
    156     if (gp[procid].neighbors[RIGHT] == -1) {
    157         for (j = firstrow; j <= lastrow; j++) {
    158             t2a[j][jm - 1] = 0.0;
    159         }
    160     }
    161     for (i = firstrow; i <= lastrow; i++) {
    162         t1a = (double *) t2a[i];
    163         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    164             t1a[iindex] = 0.0;
    165         }
    166     }
    167 
    168 /* put the laplacian of psi{1,3} in work1{1,2}
    169    note that psi(i,j,2) represents the psi3 array in
    170    the original equations  */
    171 
    172     for (psiindex = 0; psiindex <= 1; psiindex++) {
    173         t2a = (double **) work1[procid][psiindex];
    174         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    175             t2a[0][0] = 0;
    176         }
    177         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    178             t2a[im - 1][0] = 0;
    179         }
    180         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    181             t2a[0][jm - 1] = 0;
    182         }
    183         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    184             t2a[im - 1][jm - 1] = 0;
    185         }
    186         laplacalc(procid, psi, work1, psiindex, firstrow, lastrow, firstcol, lastcol);
    187     }
    188 
    189 /* set values of work2 array to psi1 - psi3   */
    190 
    191     t2a = (double **) work2[procid];
    192     t2b = (double **) psi[procid][0];
    193     t2c = (double **) psi[procid][1];
    194     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    195         t2a[0][0] = t2b[0][0] - t2c[0][0];
    196     }
    197     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    198         t2a[im - 1][0] = t2b[im - 1][0] - t2c[im - 1][0];
    199     }
    200     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    201         t2a[0][jm - 1] = t2b[0][jm - 1] - t2c[0][jm - 1];
    202     }
    203     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    204         t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1] - t2c[im - 1][jm - 1];
    205     }
    206     if (gp[procid].neighbors[UP] == -1) {
    207         t1a = (double *) t2a[0];
    208         t1b = (double *) t2b[0];
    209         t1c = (double *) t2c[0];
    210         for (j = firstcol; j <= lastcol; j++) {
    211             t1a[j] = t1b[j] - t1c[j];
    212         }
    213     }
    214     if (gp[procid].neighbors[DOWN] == -1) {
    215         t1a = (double *) t2a[im - 1];
    216         t1b = (double *) t2b[im - 1];
    217         t1c = (double *) t2c[im - 1];
    218         for (j = firstcol; j <= lastcol; j++) {
    219             t1a[j] = t1b[j] - t1c[j];
    220         }
    221     }
    222     if (gp[procid].neighbors[LEFT] == -1) {
    223         for (j = firstrow; j <= lastrow; j++) {
    224             t2a[j][0] = t2b[j][0] - t2c[j][0];
    225         }
    226     }
    227     if (gp[procid].neighbors[RIGHT] == -1) {
    228         for (j = firstrow; j <= lastrow; j++) {
    229             t2a[j][jm - 1] = t2b[j][jm - 1] - t2c[j][jm - 1];
    230         }
    231     }
    232     for (i = firstrow; i <= lastrow; i++) {
    233         t1a = (double *) t2a[i];
    234         t1b = (double *) t2b[i];
    235         t1c = (double *) t2c[i];
    236         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    237             t1a[iindex] = t1b[iindex] - t1c[iindex];
    238         }
    239     }
    240 
    241 /* set values of work3 array to h3/h * psi1 + h1/h * psi3  */
    242 
    243     t2a = (double **) work3[procid];
    244     hh3 = h3 / h;
    245     hh1 = h1 / h;
    246     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    247         t2a[0][0] = hh3 * t2a[0][0] + hh1 * t2c[0][0];
    248     }
    249     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    250         t2a[im - 1][0] = hh3 * t2a[im - 1][0] + hh1 * t2c[im - 1][0];
    251     }
    252     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    253         t2a[0][jm - 1] = hh3 * t2a[0][jm - 1] + hh1 * t2c[0][jm - 1];
    254     }
    255     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    256         t2a[im - 1][jm - 1] = hh3 * t2a[im - 1][jm - 1] + hh1 * t2c[im - 1][jm - 1];
    257     }
    258     if (gp[procid].neighbors[UP] == -1) {
    259         for (j = firstcol; j <= lastcol; j++) {
    260             t2a[0][j] = hh3 * t2a[0][j] + hh1 * t2c[0][j];
    261         }
    262     }
    263     if (gp[procid].neighbors[DOWN] == -1) {
    264         for (j = firstcol; j <= lastcol; j++) {
    265             t2a[im - 1][j] = hh3 * t2a[im - 1][j] + hh1 * t2c[im - 1][j];
    266         }
    267     }
    268     if (gp[procid].neighbors[LEFT] == -1) {
    269         for (j = firstrow; j <= lastrow; j++) {
    270             t2a[j][0] = hh3 * t2a[j][0] + hh1 * t2c[j][0];
    271         }
    272     }
    273     if (gp[procid].neighbors[RIGHT] == -1) {
    274         for (j = firstrow; j <= lastrow; j++) {
    275             t2a[j][jm - 1] = hh3 * t2a[j][jm - 1] + hh1 * t2c[j][jm - 1];
    276         }
    277     }
    278     for (i = firstrow; i <= lastrow; i++) {
    279         t1a = (double *) t2a[i];
    280         t1c = (double *) t2c[i];
    281         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    282             t1a[iindex] = hh3 * t1a[iindex] + hh1 * t1c[iindex];
    283         }
    284     }
    285 
    286 /* set values of temparray{1,3} to psi{1,3}  */
    287 
    288     for (psiindex = 0; psiindex <= 1; psiindex++) {
    289         t2a = (double **) temparray[procid][psiindex];
    290         t2b = (double **) psi[procid][psiindex];
    291         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    292             t2a[0][0] = t2b[0][0];
    293         }
    294         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    295             t2a[im - 1][0] = t2b[im - 1][0];
    296         }
    297         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    298             t2a[0][jm - 1] = t2b[0][jm - 1];
    299         }
    300         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    301             t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
    302         }
    303         if (gp[procid].neighbors[UP] == -1) {
    304             for (j = firstcol; j <= lastcol; j++) {
    305                 t2a[0][j] = t2b[0][j];
    306             }
    307         }
    308         if (gp[procid].neighbors[DOWN] == -1) {
    309             for (j = firstcol; j <= lastcol; j++) {
    310                 t2a[im - 1][j] = t2b[im - 1][j];
    311             }
    312         }
    313         if (gp[procid].neighbors[LEFT] == -1) {
    314             for (j = firstrow; j <= lastrow; j++) {
    315                 t2a[j][0] = t2b[j][0];
    316             }
    317         }
    318         if (gp[procid].neighbors[RIGHT] == -1) {
    319             for (j = firstrow; j <= lastrow; j++) {
    320                 t2a[j][jm - 1] = t2b[j][jm - 1];
    321             }
    322         }
    323 
    324         for (i = firstrow; i <= lastrow; i++) {
    325             t1a = (double *) t2a[i];
    326             t1b = (double *) t2b[i];
    327             for (iindex = firstcol; iindex <= lastcol; iindex++) {
    328                 t1a[iindex] = t1b[iindex];
    329             }
    330         }
    331     }
    332 
    333     END_PHASE(procid, 1);
    334 
    335 #if defined(MULTIPLE_BARRIERS)
    336     BARRIER(bars->sl_phase_1, nprocs)
    337 #else
    338     BARRIER(bars->barrier, nprocs)
    339 #endif
    340    
    341 /*     *******************************************************
    342 
    343               s e c o n d   p h a s e
    344 
    345        *******************************************************
    346 
    347    set values of psi{1,3} to psim{1,3}   */
    348 
    349     START_PHASE(procid, 2);
    350 
    351     for (psiindex = 0; psiindex <= 1; psiindex++) {
    352         t2a = (double **) psi[procid][psiindex];
    353         t2b = (double **) psim[procid][psiindex];
    354         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    355             t2a[0][0] = t2b[0][0];
    356         }
    357         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    358             t2a[0][jm - 1] = t2b[0][jm - 1];
    359         }
    360         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    361             t2a[im - 1][0] = t2b[im - 1][0];
    362         }
    363         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    364             t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
    365         }
    366         if (gp[procid].neighbors[UP] == -1) {
    367             for (j = firstcol; j <= lastcol; j++) {
    368                 t2a[0][j] = t2b[0][j];
    369             }
    370         }
    371         if (gp[procid].neighbors[DOWN] == -1) {
    372             for (j = firstcol; j <= lastcol; j++) {
    373                 t2a[im - 1][j] = t2b[im - 1][j];
    374             }
    375         }
    376         if (gp[procid].neighbors[LEFT] == -1) {
    377             for (j = firstrow; j <= lastrow; j++) {
    378                 t2a[j][0] = t2b[j][0];
    379             }
    380         }
    381         if (gp[procid].neighbors[RIGHT] == -1) {
    382             for (j = firstrow; j <= lastrow; j++) {
    383                 t2a[j][jm - 1] = t2b[j][jm - 1];
    384             }
    385         }
    386 
    387         for (i = firstrow; i <= lastrow; i++) {
    388             t1a = (double *) t2a[i];
    389             t1b = (double *) t2b[i];
    390             for (iindex = firstcol; iindex <= lastcol; iindex++) {
    391                 t1a[iindex] = t1b[iindex];
    392             }
    393         }
    394     }
    395 
    396 /* put the laplacian of the psim array
    397    into the work7 array; first part of a three-laplacian
    398    calculation to compute the friction terms  */
    399 
    400     for (psiindex = 0; psiindex <= 1; psiindex++) {
    401         t2a = (double **) work7[procid][psiindex];
    402         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    403             t2a[0][0] = 0;
    404         }
    405         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    406             t2a[im - 1][0] = 0;
    407         }
    408         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    409             t2a[0][jm - 1] = 0;
    410         }
    411         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    412             t2a[im - 1][jm - 1] = 0;
    413         }
    414         laplacalc(procid, psim, work7, psiindex, firstrow, lastrow, firstcol, lastcol);
    415     }
    416 
    417 /* to the values of the work1{1,2} arrays obtained from the
    418    laplacians of psi{1,2} in the previous phase, add to the
    419    elements of every column the corresponding value in the
    420    one-dimenional f array  */
    421 
    422     for (psiindex = 0; psiindex <= 1; psiindex++) {
    423         t2a = (double **) work1[procid][psiindex];
    424         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    425             t2a[0][0] = t2a[0][0] + f[0];
    426         }
    427         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    428             t2a[im - 1][0] = t2a[im - 1][0] + f[0];
    429         }
    430         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    431             t2a[0][jm - 1] = t2a[0][jm - 1] + f[jmx[numlev - 1] - 1];
    432         }
    433         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    434             t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f[jmx[numlev - 1] - 1];
    435         }
    436         if (gp[procid].neighbors[UP] == -1) {
    437             for (j = firstcol; j <= lastcol; j++) {
    438                 t2a[0][j] = t2a[0][j] + f[j + j_off];
    439             }
    440         }
    441         if (gp[procid].neighbors[DOWN] == -1) {
    442             for (j = firstcol; j <= lastcol; j++) {
    443                 t2a[im - 1][j] = t2a[im - 1][j] + f[j + j_off];
    444             }
    445         }
    446         if (gp[procid].neighbors[LEFT] == -1) {
    447             for (j = firstrow; j <= lastrow; j++) {
    448                 t2a[j][0] = t2a[j][0] + f[j + i_off];
    449             }
    450         }
    451         if (gp[procid].neighbors[RIGHT] == -1) {
    452             for (j = firstrow; j <= lastrow; j++) {
    453                 t2a[j][jm - 1] = t2a[j][jm - 1] + f[j + i_off];
    454             }
    455         }
    456         for (i = firstrow; i <= lastrow; i++) {
    457             t1a = (double *) t2a[i];
    458             for (iindex = firstcol; iindex <= lastcol; iindex++) {
    459                 t1a[iindex] = t1a[iindex] + f[iindex + j_off];
    460             }
    461         }
    462     }
    463 
    464     END_PHASE(procid, 2);
    465    
    466 #if defined(MULTIPLE_BARRIERS)
    467     BARRIER(bars->sl_phase_2, nprocs)
    468 #else
    469     BARRIER(bars->barrier, nprocs)
    470 #endif
    471 /*      *******************************************************
    472 
    473                  t h i r d   p h a s e
    474 
    475         *******************************************************
    476 
    477    put the jacobian of the work1{1,2} and psi{1,3} arrays
    478    (the latter currently in temparray) in the work5{1,2} arrays  */
    479 
    480     START_PHASE(procid, 3);
    481 
    482     for (psiindex = 0; psiindex <= 1; psiindex++) {
    483         jacobcalc2(work1, temparray, work5, psiindex, procid, firstrow, lastrow, firstcol, lastcol);
    484     }
    485 
    486 /* set values of psim{1,3} to temparray{1,3}  */
    487 
    488     for (psiindex = 0; psiindex <= 1; psiindex++) {
    489         t2a = (double **) psim[procid][psiindex];
    490         t2b = (double **) temparray[procid][psiindex];
    491         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    492             t2a[0][0] = t2b[0][0];
    493         }
    494         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    495             t2a[im - 1][0] = t2b[im - 1][0];
    496         }
    497         if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    498             t2a[0][jm - 1] = t2b[0][jm - 1];
    499         }
    500         if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    501             t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
    502         }
    503         if (gp[procid].neighbors[UP] == -1) {
    504             t1a = (double *) t2a[0];
    505             t1b = (double *) t2b[0];
    506             for (j = firstcol; j <= lastcol; j++) {
    507                 t1a[j] = t1b[j];
    508             }
    509         }
    510         if (gp[procid].neighbors[DOWN] == -1) {
    511             t1a = (double *) t2a[im - 1];
    512             t1b = (double *) t2b[im - 1];
    513             for (j = firstcol; j <= lastcol; j++) {
    514                 t1a[j] = t1b[j];
    515             }
    516         }
    517         if (gp[procid].neighbors[LEFT] == -1) {
    518             for (j = firstrow; j <= lastrow; j++) {
    519                 t2a[j][0] = t2b[j][0];
    520             }
    521         }
    522         if (gp[procid].neighbors[RIGHT] == -1) {
    523             for (j = firstrow; j <= lastrow; j++) {
    524                 t2a[j][jm - 1] = t2b[j][jm - 1];
    525             }
    526         }
    527         for (i = firstrow; i <= lastrow; i++) {
    528             t1a = (double *) t2a[i];
    529             t1b = (double *) t2b[i];
    530             for (iindex = firstcol; iindex <= lastcol; iindex++) {
    531                 t1a[iindex] = t1b[iindex];
    532             }
    533         }
    534     }
    535 
    536 /* put the laplacian of the work7{1,2} arrays in the work4{1,2}
    537    arrays; second step in the three-laplacian friction calculation  */
    538 
    539     for (psiindex = 0; psiindex <= 1; psiindex++) {
    540         laplacalc(procid, work7, work4, psiindex, firstrow, lastrow, firstcol, lastcol);
    541     }
    542 
    543     END_PHASE(procid, 3);
    544 
    545 #if defined(MULTIPLE_BARRIERS)
    546     BARRIER(bars->sl_phase_3, nprocs)
    547 #else
    548     BARRIER(bars->barrier, nprocs)
    549 #endif
    550 
    551 /*     *******************************************************
    552 
    553                 f o u r t h   p h a s e
    554 
    555        *******************************************************
    556 
    557    put the jacobian of the work2 and work3 arrays in the work6
    558    array  */
    559 
    560     START_PHASE(procid, 4);
    561 
    562     jacobcalc(work2, work3, work6, procid, firstrow, lastrow, firstcol, lastcol);
    563 
    564 /* put the laplacian of the work4{1,2} arrays in the work7{1,2}
    565    arrays; third step in the three-laplacian friction calculation  */
    566 
    567     for (psiindex = 0; psiindex <= 1; psiindex++) {
    568         laplacalc(procid, work4, work7, psiindex, firstrow, lastrow, firstcol, lastcol);
    569     }
    570    
    571     END_PHASE(procid, 4);
    572 
    573 #if defined(MULTIPLE_BARRIERS)
    574     BARRIER(bars->sl_phase_4, nprocs)
    575 #else
    576     BARRIER(bars->barrier, nprocs)
    577 #endif
    578 
    579 /*     *******************************************************
    580 
    581                 f i f t h   p h a s e
    582 
    583        *******************************************************
    584 
    585    use the values of the work5, work6 and work7 arrays
    586    computed in the previous time-steps to compute the
    587    ga and gb arrays   */
    588 
    589     START_PHASE(procid, 5);
    590 
    591     hinv = 1.0 / h;
    592     h1inv = 1.0 / h1;
    593 
    594     t2a = (double **) ga[procid];
    595     t2b = (double **) gb[procid];
    596     t2c = (double **) work5[procid][0];
    597     t2d = (double **) work5[procid][1];
    598     t2e = (double **) work7[procid][0];
    599     t2f = (double **) work7[procid][1];
    600     t2g = (double **) work6[procid];
    601     t2h = (double **) tauz[procid];
    602     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    603         t2a[0][0] = t2c[0][0] - t2d[0][0] + eig2 * t2g[0][0] + h1inv * t2h[0][0] + lf * t2e[0][0] - lf * t2f[0][0];
    604         t2b[0][0] = hh1 * t2c[0][0] + hh3 * t2d[0][0] + hinv * t2h[0][0] + lf * hh1 * t2e[0][0] + lf * hh3 * t2f[0][0];
    605     }
    606     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    607         t2a[im - 1][0] = t2c[im - 1][0] - t2d[im - 1][0] + eig2 * t2g[im - 1][0] + h1inv * t2h[im - 1][0] + lf * t2e[im - 1][0] - lf * t2f[im - 1][0];
    608         t2b[im - 1][0] = hh1 * t2c[im - 1][0] + hh3 * t2d[im - 1][0] + hinv * t2h[im - 1][0] + lf * hh1 * t2e[im - 1][0] + lf * hh3 * t2f[im - 1][0];
    609     }
    610     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    611         t2a[0][jm - 1] = t2c[0][jm - 1] - t2d[0][jm - 1] + eig2 * t2g[0][jm - 1] + h1inv * t2h[0][jm - 1] + lf * t2e[0][jm - 1] - lf * t2f[0][jm - 1];
    612         t2b[0][jm - 1] = hh1 * t2c[0][jm - 1] + hh3 * t2d[0][jm - 1] + hinv * t2h[0][jm - 1] + lf * hh1 * t2e[0][jm - 1] + lf * hh3 * t2f[0][jm - 1];
    613     }
    614     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    615         t2a[im - 1][jm - 1] = t2c[im - 1][jm - 1] - t2d[im - 1][jm - 1] + eig2 * t2g[im - 1][jm - 1] + h1inv * t2h[im - 1][jm - 1] + lf * t2e[im - 1][jm - 1] - lf * t2f[im - 1][jm - 1];
    616         t2b[im - 1][jm - 1] = hh1 * t2c[im - 1][jm - 1] + hh3 * t2d[im - 1][jm - 1] + hinv * t2h[im - 1][jm - 1] + lf * hh1 * t2e[im - 1][jm - 1] + lf * hh3 * t2f[im - 1][jm - 1];
    617     }
    618     if (gp[procid].neighbors[UP] == -1) {
    619         t1a = (double *) t2a[0];
    620         t1b = (double *) t2b[0];
    621         t1c = (double *) t2c[0];
    622         t1d = (double *) t2d[0];
    623         t1e = (double *) t2e[0];
    624         t1f = (double *) t2f[0];
    625         t1g = (double *) t2g[0];
    626         t1h = (double *) t2h[0];
    627         for (j = firstcol; j <= lastcol; j++) {
    628             t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];
    629             t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];
    630         }
    631     }
    632     if (gp[procid].neighbors[DOWN] == -1) {
    633         t1a = (double *) t2a[im - 1];
    634         t1b = (double *) t2b[im - 1];
    635         t1c = (double *) t2c[im - 1];
    636         t1d = (double *) t2d[im - 1];
    637         t1e = (double *) t2e[im - 1];
    638         t1f = (double *) t2f[im - 1];
    639         t1g = (double *) t2g[im - 1];
    640         t1h = (double *) t2h[im - 1];
    641         for (j = firstcol; j <= lastcol; j++) {
    642             t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];
    643             t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];
    644         }
    645     }
    646     if (gp[procid].neighbors[LEFT] == -1) {
    647         for (j = firstrow; j <= lastrow; j++) {
    648             t2a[j][0] = t2c[j][0] - t2d[j][0] + eig2 * t2g[j][0] + h1inv * t2h[j][0] + lf * t2e[j][0] - lf * t2f[j][0];
    649             t2b[j][0] = hh1 * t2c[j][0] + hh3 * t2d[j][0] + hinv * t2h[j][0] + lf * hh1 * t2e[j][0] + lf * hh3 * t2f[j][0];
    650         }
    651     }
    652     if (gp[procid].neighbors[RIGHT] == -1) {
    653         for (j = firstrow; j <= lastrow; j++) {
    654             t2a[j][jm - 1] = t2c[j][jm - 1] - t2d[j][jm - 1] + eig2 * t2g[j][jm - 1] + h1inv * t2h[j][jm - 1] + lf * t2e[j][jm - 1] - lf * t2f[j][jm - 1];
    655             t2b[j][jm - 1] = hh1 * t2c[j][jm - 1] + hh3 * t2d[j][jm - 1] + hinv * t2h[j][jm - 1] + lf * hh1 * t2e[j][jm - 1] + lf * hh3 * t2f[j][jm - 1];
    656         }
    657     }
    658 
    659     for (i = firstrow; i <= lastrow; i++) {
    660         t1a = (double *) t2a[i];
    661         t1b = (double *) t2b[i];
    662         t1c = (double *) t2c[i];
    663         t1d = (double *) t2d[i];
    664         t1e = (double *) t2e[i];
    665         t1f = (double *) t2f[i];
    666         t1g = (double *) t2g[i];
    667         t1h = (double *) t2h[i];
    668         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    669             t1a[iindex] = t1c[iindex] - t1d[iindex] + eig2 * t1g[iindex] + h1inv * t1h[iindex] + lf * t1e[iindex] - lf * t1f[iindex];
    670             t1b[iindex] = hh1 * t1c[iindex] + hh3 * t1d[iindex] + hinv * t1h[iindex] + lf * hh1 * t1e[iindex] + lf * hh3 * t1f[iindex];
    671         }
    672     }
    673 
    674     END_PHASE(procid, 5);
    675 
    676 #if defined(MULTIPLE_BARRIERS)
    677     BARRIER(bars->sl_phase_5, nprocs)
    678 #else
    679     BARRIER(bars->barrier, nprocs)
    680 #endif
    681 
    682 /*     *******************************************************
    683 
    684                s i x t h   p h a s e
    685 
    686        *******************************************************  */
    687 
    688     START_PHASE(procid, 6);
    689 
    690     istart = 1;
    691     iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;
    692     jstart = 1;
    693     jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;
    694     ist = istart;
    695     ien = iend;
    696     jst = jstart;
    697     jen = jend;
    698 
    699     if (gp[procid].neighbors[UP] == -1) {
    700         istart = 0;
    701     }
    702     if (gp[procid].neighbors[LEFT] == -1) {
    703         jstart = 0;
    704     }
    705     if (gp[procid].neighbors[DOWN] == -1) {
    706         iend = im - 1;
    707     }
    708     if (gp[procid].neighbors[RIGHT] == -1) {
    709         jend = jm - 1;
    710     }
    711     t2a = (double **) rhs_multi[procid][numlev - 1];
    712     t2b = (double **) ga[procid];
    713     t2c = (double **) oldga[procid];
    714     t2d = (double **) q_multi[procid][numlev - 1];
    715     for (i = istart; i <= iend; i++) {
    716         t1a = (double *) t2a[i];
    717         t1b = (double *) t2b[i];
    718         for (j = jstart; j <= jend; j++) {
    719             t1a[j] = t1b[j] * ressqr;
    720         }
    721     }
    722 
    723     if (gp[procid].neighbors[UP] == -1) {
    724         t1d = (double *) t2d[0];
    725         t1b = (double *) t2b[0];
    726         for (j = jstart; j <= jend; j++) {
    727             t1d[j] = t1b[j];
    728         }
    729     }
    730     if (gp[procid].neighbors[DOWN] == -1) {
    731         t1d = (double *) t2d[im - 1];
    732         t1b = (double *) t2b[im - 1];
    733         for (j = jstart; j <= jend; j++) {
    734             t1d[j] = t1b[j];
    735         }
    736     }
    737     if (gp[procid].neighbors[LEFT] == -1) {
    738         for (i = istart; i <= iend; i++) {
    739             t2d[i][0] = t2b[i][0];
    740         }
    741     }
    742     if (gp[procid].neighbors[RIGHT] == -1) {
    743         for (i = istart; i <= iend; i++) {
    744             t2d[i][jm - 1] = t2b[i][jm - 1];
    745         }
    746     }
    747     //fac = 1.0 / (4.0 - ressqr*eig2);
    748     for (i = ist; i <= ien; i++) {
    749         t1d = (double *) t2d[i];
    750         t1c = (double *) t2c[i];
    751         for (j = jst; j <= jen; j++) {
    752             t1d[j] = t1c[j];
    753         }
    754     }
    755 
    756     if ((procid == MASTER) || (do_stats)) {
    757         CLOCK(multi_start);
    758     }
    759 
    760     multig(procid);
    761 
    762     if ((procid == MASTER) || (do_stats)) {
    763         CLOCK(multi_end);
    764         (*gp[procid].multi_time) += (multi_end - multi_start);
    765     }
    766 
    767 /* the shared sum variable psiai is initialized to 0 at
    768    every time-step  */
    769 
    770     if (procid == MASTER) {
    771         global->psiai = 0.0;
    772     }
    773 
    774 /*  copy the solution for use as initial guess in next time-step  */
    775 
    776     for (i = istart; i <= iend; i++) {
    777         t1b = (double *) t2b[i];
    778         t1c = (double *) t2c[i];
    779         t1d = (double *) t2d[i];
    780         for (j = jstart; j <= jend; j++) {
    781             t1b[j] = t1d[j];
    782             t1c[j] = t1d[j];
    783         }
    784     }
    785 
    786     END_PHASE(procid, 6);
    787 
    788 #if defined(MULTIPLE_BARRIERS)
    789     BARRIER(bars->sl_phase_6, nprocs)
    790 #else
    791     BARRIER(bars->barrier, nprocs)
    792 #endif
    793 
    794 /*     *******************************************************
    795 
    796                 s e v e n t h   p h a s e
    797 
    798        *******************************************************
    799 
    800    every process computes the running sum for its assigned portion
    801    in a private variable psiaipriv   */
    802 
    803     START_PHASE(procid, 7);
    804 
    805     psiaipriv = 0.0;
    806     t2a = (double **) ga[procid];
    807     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    808         psiaipriv = psiaipriv + 0.25 * (t2a[0][0]);
    809     }
    810     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    811         psiaipriv = psiaipriv + 0.25 * (t2a[0][jm - 1]);
    812     }
    813     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    814         psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][0]);
    815     }
    816     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    817         psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][jm - 1]);
    818     }
    819     if (gp[procid].neighbors[UP] == -1) {
    820         t1a = (double *) t2a[0];
    821         for (j = firstcol; j <= lastcol; j++) {
    822             psiaipriv = psiaipriv + 0.5 * t1a[j];
    823         }
    824     }
    825     if (gp[procid].neighbors[DOWN] == -1) {
    826         t1a = (double *) t2a[im - 1];
    827         for (j = firstcol; j <= lastcol; j++) {
    828             psiaipriv = psiaipriv + 0.5 * t1a[j];
    829         }
    830     }
    831     if (gp[procid].neighbors[LEFT] == -1) {
    832         for (j = firstrow; j <= lastrow; j++) {
    833             psiaipriv = psiaipriv + 0.5 * t2a[j][0];
    834         }
    835     }
    836     if (gp[procid].neighbors[RIGHT] == -1) {
    837         for (j = firstrow; j <= lastrow; j++) {
    838             psiaipriv = psiaipriv + 0.5 * t2a[j][jm - 1];
    839         }
    840     }
    841     for (i = firstrow; i <= lastrow; i++) {
    842         t1a = (double *) t2a[i];
    843         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    844             psiaipriv = psiaipriv + t1a[iindex];
    845         }
    846     }
    847 
    848 /* after computing its private sum, every process adds that to the
    849    shared running sum psiai  */
    850 
    851     LOCK(locks->psiailock)
    852     global->psiai = global->psiai + psiaipriv;
    853     UNLOCK(locks->psiailock)
    854 
    855     END_PHASE(procid, 7);
    856 
    857 #if defined(MULTIPLE_BARRIERS)
    858     BARRIER(bars->sl_phase_7, nprocs)
    859 #else
    860     BARRIER(bars->barrier, nprocs)
    861 #endif
    862    
    863 /*      *******************************************************
    864 
    865                 e i g h t h   p h a s e
    866 
    867         *******************************************************
    868 
    869    augment ga(i,j) with [-psiai/psibi]*psib(i,j) */
    870 
    871     START_PHASE(procid, 8);
    872 
    873     f4 = (-global->psiai) /(global->psibi);
    874 
    875     t2a = (double **) ga[procid];
    876     t2b = (double **) psib[procid];
    877     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    878         t2a[0][0] = t2a[0][0] + f4 * t2b[0][0];
    879     }
    880     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    881         t2a[im - 1][0] = t2a[im - 1][0] + f4 * t2b[im - 1][0];
    882     }
    883     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    884         t2a[0][jm - 1] = t2a[0][jm - 1] + f4 * t2b[0][jm - 1];
    885     }
    886     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    887         t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f4 * t2b[im - 1][jm - 1];
    888     }
    889     if (gp[procid].neighbors[UP] == -1) {
    890         t1a = (double *) t2a[0];
    891         t1b = (double *) t2b[0];
    892         for (j = firstcol; j <= lastcol; j++) {
    893             t1a[j] = t1a[j] + f4 * t1b[j];
    894         }
    895     }
    896     if (gp[procid].neighbors[DOWN] == -1) {
    897         t1a = (double *) t2a[im - 1];
    898         t1b = (double *) t2b[im - 1];
    899         for (j = firstcol; j <= lastcol; j++) {
    900             t1a[j] = t1a[j] + f4 * t1b[j];
    901         }
    902     }
    903     if (gp[procid].neighbors[LEFT] == -1) {
    904         for (j = firstrow; j <= lastrow; j++) {
    905             t2a[j][0] = t2a[j][0] + f4 * t2b[j][0];
    906         }
    907     }
    908     if (gp[procid].neighbors[RIGHT] == -1) {
    909         for (j = firstrow; j <= lastrow; j++) {
    910             t2a[j][jm - 1] = t2a[j][jm - 1] + f4 * t2b[j][jm - 1];
    911         }
    912     }
    913     for (i = firstrow; i <= lastrow; i++) {
    914         t1a = (double *) t2a[i];
    915         t1b = (double *) t2b[i];
    916         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    917             t1a[iindex] = t1a[iindex] + f4 * t1b[iindex];
    918         }
    919     }
    920 
    921     t2a = (double **) rhs_multi[procid][numlev - 1];
    922     t2b = (double **) gb[procid];
    923     t2c = (double **) oldgb[procid];
    924     t2d = (double **) q_multi[procid][numlev - 1];
    925     for (i = istart; i <= iend; i++) {
    926         t1a = (double *) t2a[i];
    927         t1b = (double *) t2b[i];
    928         for (j = jstart; j <= jend; j++) {
    929             t1a[j] = t1b[j] * ressqr;
    930         }
    931     }
    932     if (gp[procid].neighbors[UP] == -1) {
    933         t1d = (double *) t2d[0];
    934         t1b = (double *) t2b[0];
    935         for (j = jstart; j <= jend; j++) {
    936             t1d[j] = t1b[j];
    937         }
    938     }
    939     if (gp[procid].neighbors[DOWN] == -1) {
    940         t1d = (double *) t2d[im - 1];
    941         t1b = (double *) t2b[im - 1];
    942         for (j = jstart; j <= jend; j++) {
    943             t1d[j] = t1b[j];
    944         }
    945     }
    946     if (gp[procid].neighbors[LEFT] == -1) {
    947         for (i = istart; i <= iend; i++) {
    948             t2d[i][0] = t2b[i][0];
    949         }
    950     }
    951     if (gp[procid].neighbors[RIGHT] == -1) {
    952         for (i = istart; i <= iend; i++) {
    953             t2d[i][jm - 1] = t2b[i][jm - 1];
    954         }
    955     }
    956     //fac = 1.0 / (4.0 - ressqr*eig2);
    957     for (i = ist; i <= ien; i++) {
    958         t1d = (double *) t2d[i];
    959         t1c = (double *) t2c[i];
    960         for (j = jst; j <= jen; j++) {
    961             t1d[j] = t1c[j];
    962         }
    963     }
    964 
    965     if ((procid == MASTER) || (do_stats)) {
    966         CLOCK(multi_start);
    967     }
    968 
    969     multig(procid);
    970 
    971     if ((procid == MASTER) || (do_stats)) {
    972         CLOCK(multi_end);
    973         (*gp[procid].multi_time) += (multi_end - multi_start);
    974     }
    975 
    976     for (i = istart; i <= iend; i++) {
    977         t1b = (double *) t2b[i];
    978         t1c = (double *) t2c[i];
    979         t1d = (double *) t2d[i];
    980         for (j = jstart; j <= jend; j++) {
    981             t1b[j] = t1d[j];
    982             t1c[j] = t1d[j];
    983         }
    984     }
    985 
    986     END_PHASE(procid, 8);
    987 
    988 #if defined(MULTIPLE_BARRIERS)
    989     BARRIER(bars->sl_phase_8, nprocs)
    990 #else
    991     BARRIER(bars->barrier, nprocs)
    992 #endif
    993 
    994 /*      *******************************************************
    995 
    996                 n i n t h   p h a s e
    997 
    998         *******************************************************
    999 
    1000    put appropriate linear combinations of ga and gb in work2 and work3;
    1001    note that here (as in most cases) the constant multipliers are made
    1002    private variables; the specific order in which things are done is
    1003    chosen in order to hopefully reuse things brought into the cache
    1004 
    1005    note that here again we choose to have all processes share the work
    1006    on both matrices despite the fact that the work done per element
    1007    is the same, because the operand matrices are the same in both cases */
    1008 
    1009     START_PHASE(procid, 9);
    1010 
    1011     t2a = (double **) ga[procid];
    1012     t2b = (double **) gb[procid];
    1013     t2c = (double **) work2[procid];
    1014     t2d = (double **) work3[procid];
    1015     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1016         t2c[0][0] = t2b[0][0] - hh1 * t2a[0][0];
    1017         t2d[0][0] = t2b[0][0] + hh3 * t2a[0][0];
    1018     }
    1019     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1020         t2c[im - 1][0] = t2b[im - 1][0] - hh1 * t2a[im - 1][0];
    1021         t2d[im - 1][0] = t2b[im - 1][0] + hh3 * t2a[im - 1][0];
    1022     }
    1023     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1024         t2c[0][jm - 1] = t2b[0][jm - 1] - hh1 * t2a[0][jm - 1];
    1025         t2d[0][jm - 1] = t2b[0][jm - 1] + hh3 * t2a[0][jm - 1];
    1026     }
    1027     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1028         t2c[im - 1][jm - 1] = t2b[im - 1][jm - 1] - hh1 * t2a[im - 1][jm - 1];
    1029         t2d[im - 1][jm - 1] = t2b[im - 1][jm - 1] + hh3 * t2a[im - 1][jm - 1];
    1030     }
    1031     if (gp[procid].neighbors[UP] == -1) {
    1032         t1a = (double *) t2a[0];
    1033         t1b = (double *) t2b[0];
    1034         t1c = (double *) t2c[0];
    1035         t1d = (double *) t2d[0];
    1036         for (j = firstcol; j <= lastcol; j++) {
    1037             t1d[j] = t1b[j] + hh3 * t1a[j];
    1038             t1c[j] = t1b[j] - hh1 * t1a[j];
    1039         }
    1040     }
    1041     if (gp[procid].neighbors[DOWN] == -1) {
    1042         t1a = (double *) t2a[im - 1];
    1043         t1b = (double *) t2b[im - 1];
    1044         t1c = (double *) t2c[im - 1];
    1045         t1d = (double *) t2d[im - 1];
    1046         for (j = firstcol; j <= lastcol; j++) {
    1047             t1d[j] = t1b[j] + hh3 * t1a[j];
    1048             t1c[j] = t1b[j] - hh1 * t1a[j];
    1049         }
    1050     }
    1051     if (gp[procid].neighbors[LEFT] == -1) {
    1052         for (j = firstrow; j <= lastrow; j++) {
    1053             t2d[j][0] = t2b[j][0] + hh3 * t2a[j][0];
    1054             t2c[j][0] = t2b[j][0] - hh1 * t2a[j][0];
    1055         }
    1056     }
    1057     if (gp[procid].neighbors[RIGHT] == -1) {
    1058         for (j = firstrow; j <= lastrow; j++) {
    1059             t2d[j][jm - 1] = t2b[j][jm - 1] + hh3 * t2a[j][jm - 1];
    1060             t2c[j][jm - 1] = t2b[j][jm - 1] - hh1 * t2a[j][jm - 1];
    1061         }
    1062     }
    1063 
    1064     for (i = firstrow; i <= lastrow; i++) {
    1065         t1a = (double *) t2a[i];
    1066         t1b = (double *) t2b[i];
    1067         t1c = (double *) t2c[i];
    1068         t1d = (double *) t2d[i];
    1069         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    1070             t1d[iindex] = t1b[iindex] + hh3 * t1a[iindex];
    1071             t1c[iindex] = t1b[iindex] - hh1 * t1a[iindex];
    1072         }
    1073     }
    1074 
    1075     END_PHASE(procid, 9);
    1076 
    1077 #if defined(MULTIPLE_BARRIERS)
    1078     BARRIER(bars->sl_phase_9, nprocs)
    1079 #else
    1080     BARRIER(bars->barrier, nprocs)
    1081 #endif
    1082 
    1083 /*      *******************************************************
    1084 
    1085                 t e n t h    p h a s e
    1086 
    1087         *******************************************************/
    1088 
    1089     START_PHASE(procid, 10);
    1090     timst = 2 * dtau;
    1091 
    1092 /* update the psi{1,3} matrices by adding 2*dtau*work3 to each */
    1093 
    1094     t2a = (double **) psi[procid][0];
    1095     t2b = (double **) work3[procid];
    1096     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1097         t2a[0][0] = t2a[0][0] + timst * t2b[0][0];
    1098     }
    1099     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1100         t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];
    1101     }
    1102     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1103         t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];
    1104     }
    1105     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1106         t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];
    1107     }
    1108     if (gp[procid].neighbors[UP] == -1) {
    1109         t1a = (double *) t2a[0];
    1110         t1b = (double *) t2b[0];
    1111         for (j = firstcol; j <= lastcol; j++) {
    1112             t1a[j] = t1a[j] + timst * t1b[j];
    1113         }
    1114     }
    1115     if (gp[procid].neighbors[DOWN] == -1) {
    1116         t1a = (double *) t2a[im - 1];
    1117         t1b = (double *) t2b[im - 1];
    1118         for (j = firstcol; j <= lastcol; j++) {
    1119             t1a[j] = t1a[j] + timst * t1b[j];
    1120         }
    1121     }
    1122     if (gp[procid].neighbors[LEFT] == -1) {
    1123         for (j = firstrow; j <= lastrow; j++) {
    1124             t2a[j][0] = t2a[j][0] + timst * t2b[j][0];
    1125         }
    1126     }
    1127     if (gp[procid].neighbors[RIGHT] == -1) {
    1128         for (j = firstrow; j <= lastrow; j++) {
    1129             t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];
    1130         }
    1131     }
    1132     for (i = firstrow; i <= lastrow; i++) {
    1133         t1a = (double *) t2a[i];
    1134         t1b = (double *) t2b[i];
    1135         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    1136             t1a[iindex] = t1a[iindex] + timst * t1b[iindex];
    1137         }
    1138     }
    1139 
    1140     t2a = (double **) psi[procid][1];
    1141     t2b = (double **) work2[procid];
    1142     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1143         t2a[0][0] = t2a[0][0] + timst * t2b[0][0];
    1144     }
    1145     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
    1146         t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];
    1147     }
    1148     if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1149         t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];
    1150     }
    1151     if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
    1152         t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];
    1153     }
    1154     if (gp[procid].neighbors[UP] == -1) {
    1155         t1a = (double *) t2a[0];
    1156         t1b = (double *) t2b[0];
    1157         for (j = firstcol; j <= lastcol; j++) {
    1158             t1a[j] = t1a[j] + timst * t1b[j];
    1159         }
    1160     }
    1161     if (gp[procid].neighbors[DOWN] == -1) {
    1162         t1a = (double *) t2a[im - 1];
    1163         t1b = (double *) t2b[im - 1];
    1164         for (j = firstcol; j <= lastcol; j++) {
    1165             t1a[j] = t1a[j] + timst * t1b[j];
    1166         }
    1167     }
    1168     if (gp[procid].neighbors[LEFT] == -1) {
    1169         for (j = firstrow; j <= lastrow; j++) {
    1170             t2a[j][0] = t2a[j][0] + timst * t2b[j][0];
    1171         }
    1172     }
    1173     if (gp[procid].neighbors[RIGHT] == -1) {
    1174         for (j = firstrow; j <= lastrow; j++) {
    1175             t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];
    1176         }
    1177     }
    1178 
    1179     for (i = firstrow; i <= lastrow; i++) {
    1180         t1a = (double *) t2a[i];
    1181         t1b = (double *) t2b[i];
    1182         for (iindex = firstcol; iindex <= lastcol; iindex++) {
    1183             t1a[iindex] = t1a[iindex] + timst * t1b[iindex];
    1184         }
    1185     }
    1186 
    1187     END_PHASE(procid, 10);
    1188 
    1189 #if defined(MULTIPLE_BARRIERS)
    1190     BARRIER(bars->sl_phase_10, nprocs)
    1191 #else
    1192     BARRIER(bars->barrier, nprocs)
    1193 #endif
    1194 }
Note: See TracChangeset for help on using the changeset viewer.