/*************************************************************************/
/*                                                                       */
/*  Copyright (c) 1994 Stanford University                               */
/*                                                                       */
/*  All rights reserved.                                                 */
/*                                                                       */
/*  Permission is given to use, copy, and modify this software for any   */
/*  non-commercial purpose as long as this copyright notice is not       */
/*  removed.  All other uses, including redistribution in whole or in    */
/*  part, are forbidden without prior written permission.                */
/*                                                                       */
/*  This software is provided with absolutely no warranty and no         */
/*  support.                                                             */
/*                                                                       */
/*************************************************************************/

/*    ****************
      subroutine slave
      ****************  */

#include <stdio.h>
#include <math.h>

#include "decs.h"

#define VERBOSE  0

/////////////////////////////////////////////////////////////
__attribute__ ((constructor)) void slave( long *ptr_procid )
/////////////////////////////////////////////////////////////
{
    long       i;
    long       j;
    long       nstep;
    long       iindex;
    long       iday = 0;
	double     ysca1;
    double     y;
    double     factor;
    double     sintemp;
    double     curlt;
    double     ressqr;
    long       istart;
    long       iend;
    long       jstart;
    long       jend;
    long       ist;
    long       ien;
    long       jst;
    long       jen;
    double     fac;
    long       dayflag = 0;
    long       dhourflag = 0;
    long       endflag = 0;
    long       firstrow;
    long       lastrow;
    long       numrows;
    long       firstcol;
    long       lastcol;
    long       numcols;
    long       psiindex;
    double     psibipriv;
    double     ttime;
    double     dhour;
    double     day;
    long       procid;
    long       j_off = 0;
    double **  t2a;
    double **  t2b;
    double *   t1a;
    double *   t1b;
    double *   t1c;
    double *   t1d;
    long       barrier_start;

    procid = *ptr_procid;

    // initialise total time in slave()   
    gps[procid]->total_time = giet_proctime();

    ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];

    // BARRIER
    barrier_start = giet_proctime();
    sqt_barrier_wait( &barrier );
    gps[procid]->sync_time += (giet_proctime() - barrier_start);

if ( VERBOSE ) { printf("\n@@@ Thread %d pass first barrier in slave()\n", procid ); }

    t2a = (double **) oldga[procid];
    t2b = (double **) oldgb[procid];
    for (i = 0; i < im; i++) 
    {
        t1a = (double *) t2a[i];
        t1b = (double *) t2b[i];
        for (j = 0; j < jm; j++) 
        {
            t1a[j] = 0.0;
            t1b[j] = 0.0;
        }
    }

    firstcol = 1;
    lastcol = firstcol + gps[procid]->rel_num_x[numlev - 1] - 1;
    firstrow = 1;
    lastrow = firstrow + gps[procid]->rel_num_y[numlev - 1] - 1;

    numcols = gps[procid]->rel_num_x[numlev - 1];
    numrows = gps[procid]->rel_num_y[numlev - 1];
    j_off = (gps[procid]->colnum) * numcols;

/* every process gets its own copy of the timing variables to avoid
   contention at shared memory locations.  here, these variables
   are initialized.  */

    ttime = 0.0;
    dhour = 0.0;
    nstep = 0;
    day   = 0.0;
    ysca1 = 0.5 * ysca;

    if (procid == MASTER) 
    {
        t1a = (double *) f;
        for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) 
        {
            y = ((double) iindex) * res;
            t1a[iindex] = f0 + beta * (y - ysca1);
        }
    }

    t2a = (double **) psium[procid];
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[0][0] = 0.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[im - 1][0] = 0.0;
    }
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[0][jm - 1] = 0.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[im - 1][jm - 1] = 0.0;
    }
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 0.0;
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 0.0;
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][0] = 0.0;
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][jm - 1] = 0.0;
        }
    }

    for (i = firstrow; i <= lastrow; i++) 
    {
        t1a = (double *) t2a[i];
        for (iindex = firstcol; iindex <= lastcol; iindex++) 
        {
            t1a[iindex] = 0.0;
        }
    }
    t2a = (double **) psilm[procid];
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[0][0] = 0.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[im - 1][0] = 0.0;
    }
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[0][jm - 1] = 0.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[im - 1][jm - 1] = 0.0;
    }
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 0.0;
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 0.0;
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][0] = 0.0;
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][jm - 1] = 0.0;
        }
    }
    for (i = firstrow; i <= lastrow; i++) 
    {
        t1a = (double *) t2a[i];
        for (iindex = firstcol; iindex <= lastcol; iindex++) 
        {
            t1a[iindex] = 0.0;
        }
    }

    t2a = (double **) psib[procid];
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[0][0] = 1.0;
    }
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[0][jm - 1] = 1.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[im - 1][0] = 1.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        t2a[im - 1][jm - 1] = 1.0;
    }
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 1.0;
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        for (j = firstcol; j <= lastcol; j++) 
        {
            t1a[j] = 1.0;
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][0] = 1.0;
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][jm - 1] = 1.0;
        }
    }
    for (i = firstrow; i <= lastrow; i++) 
    {
        t1a = (double *) t2a[i];
        for (iindex = firstcol; iindex <= lastcol; iindex++) 
        {
            t1a[iindex] = 0.0;
        }
    }

    // BARRIER
    barrier_start = giet_proctime();
    sqt_barrier_wait( &barrier );
    gps[procid]->sync_time += (giet_proctime() - barrier_start);

if ( VERBOSE ) { printf("\n@@@ Thread %d pass second barrier in slave()\n", procid ); }

    /* compute psib array (one-time computation) and integrate into psibi */
        istart = 1;
    iend = istart + gps[procid]->rel_num_y[numlev - 1] - 1;
    jstart = 1;
    jend = jstart + gps[procid]->rel_num_x[numlev - 1] - 1;
    ist = istart;
    ien = iend;
    jst = jstart;
    jen = jend;

    if (gps[procid]->neighbors[UP] == -1) 
    {
        istart = 0;
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        jstart = 0;
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        iend = im - 1;
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        jend = jm - 1;
    }

    t2a = (double **) rhs_multi[procid][numlev - 1];
    t2b = (double **) psib[procid];
    for (i = istart; i <= iend; i++) 
    {
        t1a = (double *) t2a[i];
        t1b = (double *) t2b[i];
        for (j = jstart; j <= jend; j++) 
        {
            t1a[j] = t1b[j] * ressqr;
        }
    }
    t2a = (double **) q_multi[procid][numlev - 1];
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        t1b = (double *) t2b[0];
        for (j = jstart; j <= jend; j++)
        {
            t1a[j] = t1b[j];
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        t1b = (double *) t2b[im - 1];
        for (j = jstart; j <= jend; j++) 
        {
            t1a[j] = t1b[j];
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (i = istart; i <= iend; i++) 
        {
            t2a[i][0] = t2b[i][0];
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        for (i = istart; i <= iend; i++) 
        {
            t2a[i][jm - 1] = t2b[i][jm - 1];
        }
    }
    
    // BARRIER
    barrier_start = giet_proctime();
    sqt_barrier_wait( &barrier );
    gps[procid]->sync_time += (giet_proctime() - barrier_start);
    
if ( VERBOSE ) { printf("\n@@@ Thread %d pass third barrier in slave()\n", procid ); }

    t2a = (double **) psib[procid];
    j = gps[procid]->neighbors[UP];
    if (j != -1) 
    {
        t1a = (double *) t2a[0];
        t1b = (double *) psib[j][im - 2];
        for (i = 1; i < jm - 1; i++) 
        {
            t1a[i] = t1b[i];
        }
    }
    j = gps[procid]->neighbors[DOWN];
    if (j != -1) 
    {
        t1a = (double *) t2a[im - 1];
        t1b = (double *) psib[j][1];
        for (i = 1; i < jm - 1; i++) {
            t1a[i] = t1b[i];
        }
    }
    j = gps[procid]->neighbors[LEFT];
    if (j != -1) 
    {
        t2b = (double **) psib[j];
        for (i = 1; i < im - 1; i++) 
        {
            t2a[i][0] = t2b[i][jm - 2];
        }
    }
    j = gps[procid]->neighbors[RIGHT];
    if (j != -1) 
    {
        t2b = (double **) psib[j];
        for (i = 1; i < im - 1; i++) 
        {
            t2a[i][jm - 1] = t2b[i][1];
        }
    }

    t2a = (double **) q_multi[procid][numlev - 1];
    t2b = (double **) psib[procid];
    fac = 1.0 / (4.0 - ressqr * eig2);
    for (i = ist; i <= ien; i++) {
        t1a = (double *) t2a[i];
        t1b = (double *) t2b[i];
        t1c = (double *) t2b[i - 1];
        t1d = (double *) t2b[i + 1];
        for (j = jst; j <= jen; j++) {
            t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]);
        }
    }

if ( VERBOSE ) { printf("\n@@@ Thread %d in slave() call multi\n", procid ); }

    multig(procid);

    for (i = istart; i <= iend; i++) 
    {
        t1a = (double *) t2a[i];
        t1b = (double *) t2b[i];
        for (j = jstart; j <= jend; j++) 
        {
            t1b[j] = t1a[j];
        }
    }
    
    // BARRIER
    barrier_start = giet_proctime();
    sqt_barrier_wait( &barrier );
    gps[procid]->sync_time += (giet_proctime() - barrier_start);
    
if ( VERBOSE ) { printf("\n@@@ Thread %d pass fourth barrier in slave()\n", procid ); }

/* update the local running sum psibipriv by summing all the resulting
   values in that process's share of the psib matrix   */
   
    t2a = (double **) psib[procid];
    psibipriv = 0.0;
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        psibipriv = psibipriv + 0.25 * (t2a[0][0]);
    }
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]);
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]);
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]);
    }
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        for (j = firstcol; j <= lastcol; j++) 
        {
            psibipriv = psibipriv + 0.5 * t1a[j];
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        for (j = firstcol; j <= lastcol; j++) 
        {
            psibipriv = psibipriv + 0.5 * t1a[j];
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            psibipriv = psibipriv + 0.5 * t2a[j][0];
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            psibipriv = psibipriv + 0.5 * t2a[j][jm - 1];
        }
    }
    for (i = firstrow; i <= lastrow; i++) 
    {
        t1a = (double *) t2a[i];
        for (iindex = firstcol; iindex <= lastcol; iindex++) 
        {
            psibipriv = psibipriv + t1a[iindex];
        }
    }

/* update the shared variable psibi by summing all the psibiprivs
   of the individual processes into it.  note that this combined
   private and shared sum method avoids accessing the shared
   variable psibi once for every element of the matrix.  */

    sqt_lock_acquire( &psibi_lock );
    global->psibi = global->psibi + psibipriv;
    sqt_lock_release( &psibi_lock );

/* initialize psim matrices

   if there is more than one process, then split the processes
   between the two psim matrices; otherwise, let the single process
   work on one first and then the other   */

    for (psiindex = 0; psiindex <= 1; psiindex++) 
    {
        t2a = (double **) psim[procid][psiindex];
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[0][0] = 0.0;
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[im - 1][0] = 0.0;
        }
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[0][jm - 1] = 0.0;
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[im - 1][jm - 1] = 0.0;
        }
        if (gps[procid]->neighbors[UP] == -1) 
        {
            t1a = (double *) t2a[0];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = 0.0;
            }
        }
        if (gps[procid]->neighbors[DOWN] == -1) 
        {
            t1a = (double *) t2a[im - 1];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = 0.0;
            }
        }
        if (gps[procid]->neighbors[LEFT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][0] = 0.0;
            }
        }
        if (gps[procid]->neighbors[RIGHT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][jm - 1] = 0.0;
            }
        }
        for (i = firstrow; i <= lastrow; i++) 
        {
            t1a = (double *) t2a[i];
            for (iindex = firstcol; iindex <= lastcol; iindex++)
            {
                t1a[iindex] = 0.0;
            }
        }
    }

/* initialize psi matrices the same way  */

    for (psiindex = 0; psiindex <= 1; psiindex++) 
    {
        t2a = (double **) psi[procid][psiindex];
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[0][0] = 0.0;
        }
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[0][jm - 1] = 0.0;
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[im - 1][0] = 0.0;
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[im - 1][jm - 1] = 0.0;
        }
        if (gps[procid]->neighbors[UP] == -1) 
        {
            t1a = (double *) t2a[0];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = 0.0;
            }
        }
        if (gps[procid]->neighbors[DOWN] == -1) 
        {
            t1a = (double *) t2a[im - 1];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = 0.0;
            }
        }
        if (gps[procid]->neighbors[LEFT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][0] = 0.0;
            }
        }
        if (gps[procid]->neighbors[RIGHT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][jm - 1] = 0.0;
            }
        }
        for (i = firstrow; i <= lastrow; i++) 
        {
            t1a = (double *) t2a[i];
            for (iindex = firstcol; iindex <= lastcol; iindex++) 
            {
                t1a[iindex] = 0.0;
            }
        }
    }

/* compute input curl of wind stress */

    t2a = (double **) tauz[procid];
    ysca1 = .5 * ysca;
    factor = -dt0 * pi / ysca1;
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[0][0] = 0.0;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
    {
        t2a[im - 1][0] = 0.0;
    }
    if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
        sintemp = sin(sintemp);
        t2a[0][jm - 1] = factor * sintemp;
    }
    if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
    {
        sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
        sintemp = sin(sintemp);
        t2a[im - 1][jm - 1] = factor * sintemp;
    }
    if (gps[procid]->neighbors[UP] == -1) 
    {
        t1a = (double *) t2a[0];
        for (j = firstcol; j <= lastcol; j++) 
        {
            sintemp = pi * ((double) j + j_off) * res / ysca1;
            sintemp = sin(sintemp);
            curlt = factor * sintemp;
            t1a[j] = curlt;
        }
    }
    if (gps[procid]->neighbors[DOWN] == -1) 
    {
        t1a = (double *) t2a[im - 1];
        for (j = firstcol; j <= lastcol; j++) 
        {
            sintemp = pi * ((double) j + j_off) * res / ysca1;
            sintemp = sin(sintemp);
            curlt = factor * sintemp;
            t1a[j] = curlt;
        }
    }
    if (gps[procid]->neighbors[LEFT] == -1) 
    {
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][0] = 0.0;
        }
    }
    if (gps[procid]->neighbors[RIGHT] == -1) 
    {
        sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
        sintemp = sin(sintemp);
        curlt = factor * sintemp;
        for (j = firstrow; j <= lastrow; j++) 
        {
            t2a[j][jm - 1] = curlt;
        }
    }
    for (i = firstrow; i <= lastrow; i++) 
    {
        t1a = (double *) t2a[i];
        for (iindex = firstcol; iindex <= lastcol; iindex++) 
        {
            sintemp = pi * ((double) iindex + j_off) * res / ysca1;
            sintemp = sin(sintemp);
            curlt = factor * sintemp;
            t1a[iindex] = curlt;
        }
    }
    
    // BARRIER
    barrier_start = giet_proctime();
    sqt_barrier_wait( &barrier );
    gps[procid]->sync_time += (giet_proctime() - barrier_start);
    
if ( VERBOSE ) { printf("\n@@@ Thread %d pass fifth barrier in slave()\n", procid ); }

/***************************************************************
 one-time stuff over at this point
 ***************************************************************/

    while (!endflag) 
    {
        while ((!dayflag) || (!dhourflag)) 
        {
            dayflag = 0;
            dhourflag = 0;

if ( VERBOSE ) { printf("\n@@@ Thread %d call slave2() : step %d\n", procid, nstep ); }

            slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols);

/* update time and step number
   note that these time and step variables are private i.e. every
   process has its own copy and keeps track of its own time  */

            ttime = ttime + dtau;
            nstep = nstep + 1;
            day = ttime / 86400.0;

            if (day > ((double) outday0)) 
            {
                dayflag = 1;
                iday = (long) day;
                dhour = dhour + dtau;
                if (dhour >= 86400.0) 
                {
                    dhourflag = 1;
                }
            }
        }  // end while
        dhour = 0.0;

        t2a = (double **) psium[procid];
        t2b = (double **) psim[procid][0];
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[0][0] = t2a[0][0] + t2b[0][0];
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
        }
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
        }
        if (gps[procid]->neighbors[UP] == -1) 
        {
            t1a = (double *) t2a[0];
            t1b = (double *) t2b[0];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = t1a[j] + t1b[j];
            }
        }
        if (gps[procid]->neighbors[DOWN] == -1) 
        {
            t1a = (double *) t2a[im - 1];
            t1b = (double *) t2b[im - 1];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = t1a[j] + t1b[j];
            }
        }
        if (gps[procid]->neighbors[LEFT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][0] = t2a[j][0] + t2b[j][0];
            }
        }
        if (gps[procid]->neighbors[RIGHT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
            }
        }
        for (i = firstrow; i <= lastrow; i++) 
        {
            t1a = (double *) t2a[i];
            t1b = (double *) t2b[i];
            for (iindex = firstcol; iindex <= lastcol; iindex++) 
            {
                t1a[iindex] = t1a[iindex] + t1b[iindex];
            }
        }

/* update values of psilm array to psilm + psim[2]  */

        t2a = (double **) psilm[procid];
        t2b = (double **) psim[procid][1];
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[0][0] = t2a[0][0] + t2b[0][0];
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) 
        {
            t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
        }
        if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
        }
        if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) 
        {
            t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
        }
        if (gps[procid]->neighbors[UP] == -1) 
        {
            t1a = (double *) t2a[0];
            t1b = (double *) t2b[0];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = t1a[j] + t1b[j];
            }
        }
        if (gps[procid]->neighbors[DOWN] == -1) 
        {
            t1a = (double *) t2a[im - 1];
            t1b = (double *) t2b[im - 1];
            for (j = firstcol; j <= lastcol; j++) 
            {
                t1a[j] = t1a[j] + t1b[j];
            }
        }
        if (gps[procid]->neighbors[LEFT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][0] = t2a[j][0] + t2b[j][0];
            }
        }
        if (gps[procid]->neighbors[RIGHT] == -1) 
        {
            for (j = firstrow; j <= lastrow; j++) 
            {
                t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
            }
        }
        for (i = firstrow; i <= lastrow; i++) 
        {
            t1a = (double *) t2a[i];
            t1b = (double *) t2b[i];
            for (iindex = firstcol; iindex <= lastcol; iindex++) 
            {
                t1a[iindex] = t1a[iindex] + t1b[iindex];
            }
        }
        if (iday >= (long) outday3) 
        {
            endflag = 1;
        }
    }  // end while endflag

    gps[procid]->total_time = giet_proctime() - (gps[procid]->total_time);

    if ( procid != MASTER ) giet_pthread_exit("slave completed");

}  // end slave()
