Context Navigation

source: soft/giet_vm/applications/ocean/slave2.C @ 792

Last change on this file since 792 was 598, checked in by guerin, 10 years ago
ocean: fix app broken by r589
File size: 40.1 KB

Rev	Line
[598]	1	/*************************************************************************/
	2	/* */
	3	/* Copyright (c) 1994 Stanford University */
	4	/* */
	5	/* All rights reserved. */
	6	/* */
	7	/* Permission is given to use, copy, and modify this software for any */
	8	/* non-commercial purpose as long as this copyright notice is not */
	9	/* removed. All other uses, including redistribution in whole or in */
	10	/* part, are forbidden without prior written permission. */
	11	/* */
	12	/* This software is provided with absolutely no warranty and no */
	13	/* support. */
	14	/* */
	15	/*************************************************************************/
[581]	16
[598]	17	/* ****************
	18	subroutine slave2
	19	**************** */
	20
	21	EXTERN_ENV
	22
	23	#include <stdio.h>
	24	#include <math.h>
	25	#include <stdlib.h>
	26
	27	#include "decs.h"
	28
	29	void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols)
	30	{
	31	long i;
	32	long j;
	33	long iindex;
	34	double hh1;
	35	double hh3;
	36	double hinv;
	37	double h1inv;
	38	long istart;
	39	long iend;
	40	long jstart;
	41	long jend;
	42	long ist;
	43	long ien;
	44	long jst;
	45	long jen;
	46	double ressqr;
	47	double psiaipriv;
	48	double f4;
	49	double timst;
	50	long psiindex;
	51	long i_off;
	52	long j_off;
	53	long multi_start;
	54	long multi_end;
	55	double **t2a;
	56	double **t2b;
	57	double **t2c;
	58	double **t2d;
	59	double **t2e;
	60	double **t2f;
	61	double **t2g;
	62	double **t2h;
	63	double *t1a;
	64	double *t1b;
	65	double *t1c;
	66	double *t1d;
	67	double *t1e;
	68	double *t1f;
	69	double *t1g;
	70	double *t1h;
	71
	72	ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];
	73	i_off = (gp[procid].rownum) numrows;
	74	j_off = (gp[procid].colnum) numcols;
	75
	76	START_PHASE(procid, 1);
	77
	78	/* ***************************************************************
	79
	80	f i r s t p h a s e (of timestep calculation)
	81
	82	***************************************************************/
	83
	84	t2a = (double **) ga[procid];
	85	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	86	t2a[0][0] = 0.0;
	87	}
	88	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	89	t2a[im - 1][0] = 0.0;
	90	}
	91	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	92	t2a[0][jm - 1] = 0.0;
	93	}
	94	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	95	t2a[im - 1][jm - 1] = 0.0;
	96	}
	97	if (gp[procid].neighbors[UP] == -1) {
	98	t1a = (double *) t2a[0];
	99	for (j = firstcol; j <= lastcol; j++) {
	100	t1a[j] = 0.0;
	101	}
	102	}
	103	if (gp[procid].neighbors[DOWN] == -1) {
	104	t1a = (double *) t2a[im - 1];
	105	for (j = firstcol; j <= lastcol; j++) {
	106	t1a[j] = 0.0;
	107	}
	108	}
	109	if (gp[procid].neighbors[LEFT] == -1) {
	110	for (j = firstrow; j <= lastrow; j++) {
	111	t2a[j][0] = 0.0;
	112	}
	113	}
	114	if (gp[procid].neighbors[RIGHT] == -1) {
	115	for (j = firstrow; j <= lastrow; j++) {
	116	t2a[j][jm - 1] = 0.0;
	117	}
	118	}
	119	for (i = firstrow; i <= lastrow; i++) {
	120	t1a = (double *) t2a[i];
	121	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	122	t1a[iindex] = 0.0;
	123	}
	124	}
	125
	126	t2a = (double **) gb[procid];
	127	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	128	t2a[0][0] = 0.0;
	129	}
	130	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	131	t2a[im - 1][0] = 0.0;
	132	}
	133	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	134	t2a[0][jm - 1] = 0.0;
	135	}
	136	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	137	t2a[im - 1][jm - 1] = 0.0;
	138	}
	139	if (gp[procid].neighbors[UP] == -1) {
	140	t1a = (double *) t2a[0];
	141	for (j = firstcol; j <= lastcol; j++) {
	142	t1a[j] = 0.0;
	143	}
	144	}
	145	if (gp[procid].neighbors[DOWN] == -1) {
	146	t1a = (double *) t2a[im - 1];
	147	for (j = firstcol; j <= lastcol; j++) {
	148	t1a[j] = 0.0;
	149	}
	150	}
	151	if (gp[procid].neighbors[LEFT] == -1) {
	152	for (j = firstrow; j <= lastrow; j++) {
	153	t2a[j][0] = 0.0;
	154	}
	155	}
	156	if (gp[procid].neighbors[RIGHT] == -1) {
	157	for (j = firstrow; j <= lastrow; j++) {
	158	t2a[j][jm - 1] = 0.0;
	159	}
	160	}
	161	for (i = firstrow; i <= lastrow; i++) {
	162	t1a = (double *) t2a[i];
	163	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	164	t1a[iindex] = 0.0;
	165	}
	166	}
	167
	168	/* put the laplacian of psi{1,3} in work1{1,2}
	169	note that psi(i,j,2) represents the psi3 array in
	170	the original equations */
	171
	172	for (psiindex = 0; psiindex <= 1; psiindex++) {
	173	t2a = (double **) work1[procid][psiindex];
	174	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	175	t2a[0][0] = 0;
	176	}
	177	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	178	t2a[im - 1][0] = 0;
	179	}
	180	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	181	t2a[0][jm - 1] = 0;
	182	}
	183	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	184	t2a[im - 1][jm - 1] = 0;
	185	}
	186	laplacalc(procid, psi, work1, psiindex, firstrow, lastrow, firstcol, lastcol);
	187	}
	188
	189	/* set values of work2 array to psi1 - psi3 */
	190
	191	t2a = (double **) work2[procid];
	192	t2b = (double **) psi[procid][0];
	193	t2c = (double **) psi[procid][1];
	194	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	195	t2a[0][0] = t2b[0][0] - t2c[0][0];
	196	}
	197	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	198	t2a[im - 1][0] = t2b[im - 1][0] - t2c[im - 1][0];
	199	}
	200	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	201	t2a[0][jm - 1] = t2b[0][jm - 1] - t2c[0][jm - 1];
	202	}
	203	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	204	t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1] - t2c[im - 1][jm - 1];
	205	}
	206	if (gp[procid].neighbors[UP] == -1) {
	207	t1a = (double *) t2a[0];
	208	t1b = (double *) t2b[0];
	209	t1c = (double *) t2c[0];
	210	for (j = firstcol; j <= lastcol; j++) {
	211	t1a[j] = t1b[j] - t1c[j];
	212	}
	213	}
	214	if (gp[procid].neighbors[DOWN] == -1) {
	215	t1a = (double *) t2a[im - 1];
	216	t1b = (double *) t2b[im - 1];
	217	t1c = (double *) t2c[im - 1];
	218	for (j = firstcol; j <= lastcol; j++) {
	219	t1a[j] = t1b[j] - t1c[j];
	220	}
	221	}
	222	if (gp[procid].neighbors[LEFT] == -1) {
	223	for (j = firstrow; j <= lastrow; j++) {
	224	t2a[j][0] = t2b[j][0] - t2c[j][0];
	225	}
	226	}
	227	if (gp[procid].neighbors[RIGHT] == -1) {
	228	for (j = firstrow; j <= lastrow; j++) {
	229	t2a[j][jm - 1] = t2b[j][jm - 1] - t2c[j][jm - 1];
	230	}
	231	}
	232	for (i = firstrow; i <= lastrow; i++) {
	233	t1a = (double *) t2a[i];
	234	t1b = (double *) t2b[i];
	235	t1c = (double *) t2c[i];
	236	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	237	t1a[iindex] = t1b[iindex] - t1c[iindex];
	238	}
	239	}
	240
	241	/* set values of work3 array to h3/h * psi1 + h1/h * psi3 */
	242
	243	t2a = (double **) work3[procid];
	244	hh3 = h3 / h;
	245	hh1 = h1 / h;
	246	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	247	t2a[0][0] = hh3 * t2a[0][0] + hh1 * t2c[0][0];
	248	}
	249	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	250	t2a[im - 1][0] = hh3 * t2a[im - 1][0] + hh1 * t2c[im - 1][0];
	251	}
	252	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	253	t2a[0][jm - 1] = hh3 * t2a[0][jm - 1] + hh1 * t2c[0][jm - 1];
	254	}
	255	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	256	t2a[im - 1][jm - 1] = hh3 * t2a[im - 1][jm - 1] + hh1 * t2c[im - 1][jm - 1];
	257	}
	258	if (gp[procid].neighbors[UP] == -1) {
	259	for (j = firstcol; j <= lastcol; j++) {
	260	t2a[0][j] = hh3 * t2a[0][j] + hh1 * t2c[0][j];
	261	}
	262	}
	263	if (gp[procid].neighbors[DOWN] == -1) {
	264	for (j = firstcol; j <= lastcol; j++) {
	265	t2a[im - 1][j] = hh3 * t2a[im - 1][j] + hh1 * t2c[im - 1][j];
	266	}
	267	}
	268	if (gp[procid].neighbors[LEFT] == -1) {
	269	for (j = firstrow; j <= lastrow; j++) {
	270	t2a[j][0] = hh3 * t2a[j][0] + hh1 * t2c[j][0];
	271	}
	272	}
	273	if (gp[procid].neighbors[RIGHT] == -1) {
	274	for (j = firstrow; j <= lastrow; j++) {
	275	t2a[j][jm - 1] = hh3 * t2a[j][jm - 1] + hh1 * t2c[j][jm - 1];
	276	}
	277	}
	278	for (i = firstrow; i <= lastrow; i++) {
	279	t1a = (double *) t2a[i];
	280	t1c = (double *) t2c[i];
	281	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	282	t1a[iindex] = hh3 * t1a[iindex] + hh1 * t1c[iindex];
	283	}
	284	}
	285
	286	/* set values of temparray{1,3} to psi{1,3} */
	287
	288	for (psiindex = 0; psiindex <= 1; psiindex++) {
	289	t2a = (double **) temparray[procid][psiindex];
	290	t2b = (double **) psi[procid][psiindex];
	291	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	292	t2a[0][0] = t2b[0][0];
	293	}
	294	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	295	t2a[im - 1][0] = t2b[im - 1][0];
	296	}
	297	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	298	t2a[0][jm - 1] = t2b[0][jm - 1];
	299	}
	300	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	301	t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
	302	}
	303	if (gp[procid].neighbors[UP] == -1) {
	304	for (j = firstcol; j <= lastcol; j++) {
	305	t2a[0][j] = t2b[0][j];
	306	}
	307	}
	308	if (gp[procid].neighbors[DOWN] == -1) {
	309	for (j = firstcol; j <= lastcol; j++) {
	310	t2a[im - 1][j] = t2b[im - 1][j];
	311	}
	312	}
	313	if (gp[procid].neighbors[LEFT] == -1) {
	314	for (j = firstrow; j <= lastrow; j++) {
	315	t2a[j][0] = t2b[j][0];
	316	}
	317	}
	318	if (gp[procid].neighbors[RIGHT] == -1) {
	319	for (j = firstrow; j <= lastrow; j++) {
	320	t2a[j][jm - 1] = t2b[j][jm - 1];
	321	}
	322	}
	323
	324	for (i = firstrow; i <= lastrow; i++) {
	325	t1a = (double *) t2a[i];
	326	t1b = (double *) t2b[i];
	327	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	328	t1a[iindex] = t1b[iindex];
	329	}
	330	}
	331	}
	332
	333	END_PHASE(procid, 1);
	334
	335	#if defined(MULTIPLE_BARRIERS)
	336	BARRIER(bars->sl_phase_1, nprocs)
	337	#else
	338	BARRIER(bars->barrier, nprocs)
	339	#endif
	340
	341	/* *******************************************************
	342
	343	s e c o n d p h a s e
	344
	345	*******************************************************
	346
	347	set values of psi{1,3} to psim{1,3} */
	348
	349	START_PHASE(procid, 2);
	350
	351	for (psiindex = 0; psiindex <= 1; psiindex++) {
	352	t2a = (double **) psi[procid][psiindex];
	353	t2b = (double **) psim[procid][psiindex];
	354	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	355	t2a[0][0] = t2b[0][0];
	356	}
	357	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	358	t2a[0][jm - 1] = t2b[0][jm - 1];
	359	}
	360	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	361	t2a[im - 1][0] = t2b[im - 1][0];
	362	}
	363	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	364	t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
	365	}
	366	if (gp[procid].neighbors[UP] == -1) {
	367	for (j = firstcol; j <= lastcol; j++) {
	368	t2a[0][j] = t2b[0][j];
	369	}
	370	}
	371	if (gp[procid].neighbors[DOWN] == -1) {
	372	for (j = firstcol; j <= lastcol; j++) {
	373	t2a[im - 1][j] = t2b[im - 1][j];
	374	}
	375	}
	376	if (gp[procid].neighbors[LEFT] == -1) {
	377	for (j = firstrow; j <= lastrow; j++) {
	378	t2a[j][0] = t2b[j][0];
	379	}
	380	}
	381	if (gp[procid].neighbors[RIGHT] == -1) {
	382	for (j = firstrow; j <= lastrow; j++) {
	383	t2a[j][jm - 1] = t2b[j][jm - 1];
	384	}
	385	}
	386
	387	for (i = firstrow; i <= lastrow; i++) {
	388	t1a = (double *) t2a[i];
	389	t1b = (double *) t2b[i];
	390	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	391	t1a[iindex] = t1b[iindex];
	392	}
	393	}
	394	}
	395
	396	/* put the laplacian of the psim array
	397	into the work7 array; first part of a three-laplacian
	398	calculation to compute the friction terms */
	399
	400	for (psiindex = 0; psiindex <= 1; psiindex++) {
	401	t2a = (double **) work7[procid][psiindex];
	402	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	403	t2a[0][0] = 0;
	404	}
	405	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	406	t2a[im - 1][0] = 0;
	407	}
	408	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	409	t2a[0][jm - 1] = 0;
	410	}
	411	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	412	t2a[im - 1][jm - 1] = 0;
	413	}
	414	laplacalc(procid, psim, work7, psiindex, firstrow, lastrow, firstcol, lastcol);
	415	}
	416
	417	/* to the values of the work1{1,2} arrays obtained from the
	418	laplacians of psi{1,2} in the previous phase, add to the
	419	elements of every column the corresponding value in the
	420	one-dimenional f array */
	421
	422	for (psiindex = 0; psiindex <= 1; psiindex++) {
	423	t2a = (double **) work1[procid][psiindex];
	424	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	425	t2a[0][0] = t2a[0][0] + f[0];
	426	}
	427	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	428	t2a[im - 1][0] = t2a[im - 1][0] + f[0];
	429	}
	430	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	431	t2a[0][jm - 1] = t2a[0][jm - 1] + f[jmx[numlev - 1] - 1];
	432	}
	433	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	434	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f[jmx[numlev - 1] - 1];
	435	}
	436	if (gp[procid].neighbors[UP] == -1) {
	437	for (j = firstcol; j <= lastcol; j++) {
	438	t2a[0][j] = t2a[0][j] + f[j + j_off];
	439	}
	440	}
	441	if (gp[procid].neighbors[DOWN] == -1) {
	442	for (j = firstcol; j <= lastcol; j++) {
	443	t2a[im - 1][j] = t2a[im - 1][j] + f[j + j_off];
	444	}
	445	}
	446	if (gp[procid].neighbors[LEFT] == -1) {
	447	for (j = firstrow; j <= lastrow; j++) {
	448	t2a[j][0] = t2a[j][0] + f[j + i_off];
	449	}
	450	}
	451	if (gp[procid].neighbors[RIGHT] == -1) {
	452	for (j = firstrow; j <= lastrow; j++) {
	453	t2a[j][jm - 1] = t2a[j][jm - 1] + f[j + i_off];
	454	}
	455	}
	456	for (i = firstrow; i <= lastrow; i++) {
	457	t1a = (double *) t2a[i];
	458	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	459	t1a[iindex] = t1a[iindex] + f[iindex + j_off];
	460	}
	461	}
	462	}
	463
	464	END_PHASE(procid, 2);
	465
	466	#if defined(MULTIPLE_BARRIERS)
	467	BARRIER(bars->sl_phase_2, nprocs)
	468	#else
	469	BARRIER(bars->barrier, nprocs)
	470	#endif
	471	/* *******************************************************
	472
	473	t h i r d p h a s e
	474
	475	*******************************************************
	476
	477	put the jacobian of the work1{1,2} and psi{1,3} arrays
	478	(the latter currently in temparray) in the work5{1,2} arrays */
	479
	480	START_PHASE(procid, 3);
	481
	482	for (psiindex = 0; psiindex <= 1; psiindex++) {
	483	jacobcalc2(work1, temparray, work5, psiindex, procid, firstrow, lastrow, firstcol, lastcol);
	484	}
	485
	486	/* set values of psim{1,3} to temparray{1,3} */
	487
	488	for (psiindex = 0; psiindex <= 1; psiindex++) {
	489	t2a = (double **) psim[procid][psiindex];
	490	t2b = (double **) temparray[procid][psiindex];
	491	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	492	t2a[0][0] = t2b[0][0];
	493	}
	494	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	495	t2a[im - 1][0] = t2b[im - 1][0];
	496	}
	497	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	498	t2a[0][jm - 1] = t2b[0][jm - 1];
	499	}
	500	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	501	t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];
	502	}
	503	if (gp[procid].neighbors[UP] == -1) {
	504	t1a = (double *) t2a[0];
	505	t1b = (double *) t2b[0];
	506	for (j = firstcol; j <= lastcol; j++) {
	507	t1a[j] = t1b[j];
	508	}
	509	}
	510	if (gp[procid].neighbors[DOWN] == -1) {
	511	t1a = (double *) t2a[im - 1];
	512	t1b = (double *) t2b[im - 1];
	513	for (j = firstcol; j <= lastcol; j++) {
	514	t1a[j] = t1b[j];
	515	}
	516	}
	517	if (gp[procid].neighbors[LEFT] == -1) {
	518	for (j = firstrow; j <= lastrow; j++) {
	519	t2a[j][0] = t2b[j][0];
	520	}
	521	}
	522	if (gp[procid].neighbors[RIGHT] == -1) {
	523	for (j = firstrow; j <= lastrow; j++) {
	524	t2a[j][jm - 1] = t2b[j][jm - 1];
	525	}
	526	}
	527	for (i = firstrow; i <= lastrow; i++) {
	528	t1a = (double *) t2a[i];
	529	t1b = (double *) t2b[i];
	530	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	531	t1a[iindex] = t1b[iindex];
	532	}
	533	}
	534	}
	535
	536	/* put the laplacian of the work7{1,2} arrays in the work4{1,2}
	537	arrays; second step in the three-laplacian friction calculation */
	538
	539	for (psiindex = 0; psiindex <= 1; psiindex++) {
	540	laplacalc(procid, work7, work4, psiindex, firstrow, lastrow, firstcol, lastcol);
	541	}
	542
	543	END_PHASE(procid, 3);
	544
	545	#if defined(MULTIPLE_BARRIERS)
	546	BARRIER(bars->sl_phase_3, nprocs)
	547	#else
	548	BARRIER(bars->barrier, nprocs)
	549	#endif
	550
	551	/* *******************************************************
	552
	553	f o u r t h p h a s e
	554
	555	*******************************************************
	556
	557	put the jacobian of the work2 and work3 arrays in the work6
	558	array */
	559
	560	START_PHASE(procid, 4);
	561
	562	jacobcalc(work2, work3, work6, procid, firstrow, lastrow, firstcol, lastcol);
	563
	564	/* put the laplacian of the work4{1,2} arrays in the work7{1,2}
	565	arrays; third step in the three-laplacian friction calculation */
	566
	567	for (psiindex = 0; psiindex <= 1; psiindex++) {
	568	laplacalc(procid, work4, work7, psiindex, firstrow, lastrow, firstcol, lastcol);
	569	}
	570
	571	END_PHASE(procid, 4);
	572
	573	#if defined(MULTIPLE_BARRIERS)
	574	BARRIER(bars->sl_phase_4, nprocs)
	575	#else
	576	BARRIER(bars->barrier, nprocs)
	577	#endif
	578
	579	/* *******************************************************
	580
	581	f i f t h p h a s e
	582
	583	*******************************************************
	584
	585	use the values of the work5, work6 and work7 arrays
	586	computed in the previous time-steps to compute the
	587	ga and gb arrays */
	588
	589	START_PHASE(procid, 5);
	590
	591	hinv = 1.0 / h;
	592	h1inv = 1.0 / h1;
	593
	594	t2a = (double **) ga[procid];
	595	t2b = (double **) gb[procid];
	596	t2c = (double **) work5[procid][0];
	597	t2d = (double **) work5[procid][1];
	598	t2e = (double **) work7[procid][0];
	599	t2f = (double **) work7[procid][1];
	600	t2g = (double **) work6[procid];
	601	t2h = (double **) tauz[procid];
	602	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	603	t2a[0][0] = t2c[0][0] - t2d[0][0] + eig2 * t2g[0][0] + h1inv * t2h[0][0] + lf * t2e[0][0] - lf * t2f[0][0];
	604	t2b[0][0] = hh1 * t2c[0][0] + hh3 * t2d[0][0] + hinv * t2h[0][0] + lf * hh1 * t2e[0][0] + lf * hh3 * t2f[0][0];
	605	}
	606	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	607	t2a[im - 1][0] = t2c[im - 1][0] - t2d[im - 1][0] + eig2 * t2g[im - 1][0] + h1inv * t2h[im - 1][0] + lf * t2e[im - 1][0] - lf * t2f[im - 1][0];
	608	t2b[im - 1][0] = hh1 * t2c[im - 1][0] + hh3 * t2d[im - 1][0] + hinv * t2h[im - 1][0] + lf * hh1 * t2e[im - 1][0] + lf * hh3 * t2f[im - 1][0];
	609	}
	610	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	611	t2a[0][jm - 1] = t2c[0][jm - 1] - t2d[0][jm - 1] + eig2 * t2g[0][jm - 1] + h1inv * t2h[0][jm - 1] + lf * t2e[0][jm - 1] - lf * t2f[0][jm - 1];
	612	t2b[0][jm - 1] = hh1 * t2c[0][jm - 1] + hh3 * t2d[0][jm - 1] + hinv * t2h[0][jm - 1] + lf * hh1 * t2e[0][jm - 1] + lf * hh3 * t2f[0][jm - 1];
	613	}
	614	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	615	t2a[im - 1][jm - 1] = t2c[im - 1][jm - 1] - t2d[im - 1][jm - 1] + eig2 * t2g[im - 1][jm - 1] + h1inv * t2h[im - 1][jm - 1] + lf * t2e[im - 1][jm - 1] - lf * t2f[im - 1][jm - 1];
	616	t2b[im - 1][jm - 1] = hh1 * t2c[im - 1][jm - 1] + hh3 * t2d[im - 1][jm - 1] + hinv * t2h[im - 1][jm - 1] + lf * hh1 * t2e[im - 1][jm - 1] + lf * hh3 * t2f[im - 1][jm - 1];
	617	}
	618	if (gp[procid].neighbors[UP] == -1) {
	619	t1a = (double *) t2a[0];
	620	t1b = (double *) t2b[0];
	621	t1c = (double *) t2c[0];
	622	t1d = (double *) t2d[0];
	623	t1e = (double *) t2e[0];
	624	t1f = (double *) t2f[0];
	625	t1g = (double *) t2g[0];
	626	t1h = (double *) t2h[0];
	627	for (j = firstcol; j <= lastcol; j++) {
	628	t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];
	629	t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];
	630	}
	631	}
	632	if (gp[procid].neighbors[DOWN] == -1) {
	633	t1a = (double *) t2a[im - 1];
	634	t1b = (double *) t2b[im - 1];
	635	t1c = (double *) t2c[im - 1];
	636	t1d = (double *) t2d[im - 1];
	637	t1e = (double *) t2e[im - 1];
	638	t1f = (double *) t2f[im - 1];
	639	t1g = (double *) t2g[im - 1];
	640	t1h = (double *) t2h[im - 1];
	641	for (j = firstcol; j <= lastcol; j++) {
	642	t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];
	643	t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];
	644	}
	645	}
	646	if (gp[procid].neighbors[LEFT] == -1) {
	647	for (j = firstrow; j <= lastrow; j++) {
	648	t2a[j][0] = t2c[j][0] - t2d[j][0] + eig2 * t2g[j][0] + h1inv * t2h[j][0] + lf * t2e[j][0] - lf * t2f[j][0];
	649	t2b[j][0] = hh1 * t2c[j][0] + hh3 * t2d[j][0] + hinv * t2h[j][0] + lf * hh1 * t2e[j][0] + lf * hh3 * t2f[j][0];
	650	}
	651	}
	652	if (gp[procid].neighbors[RIGHT] == -1) {
	653	for (j = firstrow; j <= lastrow; j++) {
	654	t2a[j][jm - 1] = t2c[j][jm - 1] - t2d[j][jm - 1] + eig2 * t2g[j][jm - 1] + h1inv * t2h[j][jm - 1] + lf * t2e[j][jm - 1] - lf * t2f[j][jm - 1];
	655	t2b[j][jm - 1] = hh1 * t2c[j][jm - 1] + hh3 * t2d[j][jm - 1] + hinv * t2h[j][jm - 1] + lf * hh1 * t2e[j][jm - 1] + lf * hh3 * t2f[j][jm - 1];
	656	}
	657	}
	658
	659	for (i = firstrow; i <= lastrow; i++) {
	660	t1a = (double *) t2a[i];
	661	t1b = (double *) t2b[i];
	662	t1c = (double *) t2c[i];
	663	t1d = (double *) t2d[i];
	664	t1e = (double *) t2e[i];
	665	t1f = (double *) t2f[i];
	666	t1g = (double *) t2g[i];
	667	t1h = (double *) t2h[i];
	668	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	669	t1a[iindex] = t1c[iindex] - t1d[iindex] + eig2 * t1g[iindex] + h1inv * t1h[iindex] + lf * t1e[iindex] - lf * t1f[iindex];
	670	t1b[iindex] = hh1 * t1c[iindex] + hh3 * t1d[iindex] + hinv * t1h[iindex] + lf * hh1 * t1e[iindex] + lf * hh3 * t1f[iindex];
	671	}
	672	}
	673
	674	END_PHASE(procid, 5);
	675
	676	#if defined(MULTIPLE_BARRIERS)
	677	BARRIER(bars->sl_phase_5, nprocs)
	678	#else
	679	BARRIER(bars->barrier, nprocs)
	680	#endif
	681
	682	/* *******************************************************
	683
	684	s i x t h p h a s e
	685
	686	******************************************************* */
	687
	688	START_PHASE(procid, 6);
	689
	690	istart = 1;
	691	iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;
	692	jstart = 1;
	693	jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;
	694	ist = istart;
	695	ien = iend;
	696	jst = jstart;
	697	jen = jend;
	698
	699	if (gp[procid].neighbors[UP] == -1) {
	700	istart = 0;
	701	}
	702	if (gp[procid].neighbors[LEFT] == -1) {
	703	jstart = 0;
	704	}
	705	if (gp[procid].neighbors[DOWN] == -1) {
	706	iend = im - 1;
	707	}
	708	if (gp[procid].neighbors[RIGHT] == -1) {
	709	jend = jm - 1;
	710	}
	711	t2a = (double **) rhs_multi[procid][numlev - 1];
	712	t2b = (double **) ga[procid];
	713	t2c = (double **) oldga[procid];
	714	t2d = (double **) q_multi[procid][numlev - 1];
	715	for (i = istart; i <= iend; i++) {
	716	t1a = (double *) t2a[i];
	717	t1b = (double *) t2b[i];
	718	for (j = jstart; j <= jend; j++) {
	719	t1a[j] = t1b[j] * ressqr;
	720	}
	721	}
	722
	723	if (gp[procid].neighbors[UP] == -1) {
	724	t1d = (double *) t2d[0];
	725	t1b = (double *) t2b[0];
	726	for (j = jstart; j <= jend; j++) {
	727	t1d[j] = t1b[j];
	728	}
	729	}
	730	if (gp[procid].neighbors[DOWN] == -1) {
	731	t1d = (double *) t2d[im - 1];
	732	t1b = (double *) t2b[im - 1];
	733	for (j = jstart; j <= jend; j++) {
	734	t1d[j] = t1b[j];
	735	}
	736	}
	737	if (gp[procid].neighbors[LEFT] == -1) {
	738	for (i = istart; i <= iend; i++) {
	739	t2d[i][0] = t2b[i][0];
	740	}
	741	}
	742	if (gp[procid].neighbors[RIGHT] == -1) {
	743	for (i = istart; i <= iend; i++) {
	744	t2d[i][jm - 1] = t2b[i][jm - 1];
	745	}
	746	}
	747	//fac = 1.0 / (4.0 - ressqr*eig2);
	748	for (i = ist; i <= ien; i++) {
	749	t1d = (double *) t2d[i];
	750	t1c = (double *) t2c[i];
	751	for (j = jst; j <= jen; j++) {
	752	t1d[j] = t1c[j];
	753	}
	754	}
	755
	756	if ((procid == MASTER) \|\| (do_stats)) {
	757	CLOCK(multi_start);
	758	}
	759
	760	multig(procid);
	761
	762	if ((procid == MASTER) \|\| (do_stats)) {
	763	CLOCK(multi_end);
	764	(*gp[procid].multi_time) += (multi_end - multi_start);
	765	}
	766
	767	/* the shared sum variable psiai is initialized to 0 at
	768	every time-step */
	769
	770	if (procid == MASTER) {
	771	global->psiai = 0.0;
	772	}
	773
	774	/* copy the solution for use as initial guess in next time-step */
	775
	776	for (i = istart; i <= iend; i++) {
	777	t1b = (double *) t2b[i];
	778	t1c = (double *) t2c[i];
	779	t1d = (double *) t2d[i];
	780	for (j = jstart; j <= jend; j++) {
	781	t1b[j] = t1d[j];
	782	t1c[j] = t1d[j];
	783	}
	784	}
	785
	786	END_PHASE(procid, 6);
	787
	788	#if defined(MULTIPLE_BARRIERS)
	789	BARRIER(bars->sl_phase_6, nprocs)
	790	#else
	791	BARRIER(bars->barrier, nprocs)
	792	#endif
	793
	794	/* *******************************************************
	795
	796	s e v e n t h p h a s e
	797
	798	*******************************************************
	799
	800	every process computes the running sum for its assigned portion
	801	in a private variable psiaipriv */
	802
	803	START_PHASE(procid, 7);
	804
	805	psiaipriv = 0.0;
	806	t2a = (double **) ga[procid];
	807	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	808	psiaipriv = psiaipriv + 0.25 * (t2a[0][0]);
	809	}
	810	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	811	psiaipriv = psiaipriv + 0.25 * (t2a[0][jm - 1]);
	812	}
	813	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	814	psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][0]);
	815	}
	816	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	817	psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][jm - 1]);
	818	}
	819	if (gp[procid].neighbors[UP] == -1) {
	820	t1a = (double *) t2a[0];
	821	for (j = firstcol; j <= lastcol; j++) {
	822	psiaipriv = psiaipriv + 0.5 * t1a[j];
	823	}
	824	}
	825	if (gp[procid].neighbors[DOWN] == -1) {
	826	t1a = (double *) t2a[im - 1];
	827	for (j = firstcol; j <= lastcol; j++) {
	828	psiaipriv = psiaipriv + 0.5 * t1a[j];
	829	}
	830	}
	831	if (gp[procid].neighbors[LEFT] == -1) {
	832	for (j = firstrow; j <= lastrow; j++) {
	833	psiaipriv = psiaipriv + 0.5 * t2a[j][0];
	834	}
	835	}
	836	if (gp[procid].neighbors[RIGHT] == -1) {
	837	for (j = firstrow; j <= lastrow; j++) {
	838	psiaipriv = psiaipriv + 0.5 * t2a[j][jm - 1];
	839	}
	840	}
	841	for (i = firstrow; i <= lastrow; i++) {
	842	t1a = (double *) t2a[i];
	843	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	844	psiaipriv = psiaipriv + t1a[iindex];
	845	}
	846	}
	847
	848	/* after computing its private sum, every process adds that to the
	849	shared running sum psiai */
	850
	851	LOCK(locks->psiailock)
	852	global->psiai = global->psiai + psiaipriv;
	853	UNLOCK(locks->psiailock)
	854
	855	END_PHASE(procid, 7);
	856
	857	#if defined(MULTIPLE_BARRIERS)
	858	BARRIER(bars->sl_phase_7, nprocs)
	859	#else
	860	BARRIER(bars->barrier, nprocs)
	861	#endif
	862
	863	/* *******************************************************
	864
	865	e i g h t h p h a s e
	866
	867	*******************************************************
	868
	869	augment ga(i,j) with [-psiai/psibi]psib(i,j) /
	870
	871	START_PHASE(procid, 8);
	872
	873	f4 = (-global->psiai) /(global->psibi);
	874
	875	t2a = (double **) ga[procid];
	876	t2b = (double **) psib[procid];
	877	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	878	t2a[0][0] = t2a[0][0] + f4 * t2b[0][0];
	879	}
	880	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	881	t2a[im - 1][0] = t2a[im - 1][0] + f4 * t2b[im - 1][0];
	882	}
	883	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	884	t2a[0][jm - 1] = t2a[0][jm - 1] + f4 * t2b[0][jm - 1];
	885	}
	886	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	887	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f4 * t2b[im - 1][jm - 1];
	888	}
	889	if (gp[procid].neighbors[UP] == -1) {
	890	t1a = (double *) t2a[0];
	891	t1b = (double *) t2b[0];
	892	for (j = firstcol; j <= lastcol; j++) {
	893	t1a[j] = t1a[j] + f4 * t1b[j];
	894	}
	895	}
	896	if (gp[procid].neighbors[DOWN] == -1) {
	897	t1a = (double *) t2a[im - 1];
	898	t1b = (double *) t2b[im - 1];
	899	for (j = firstcol; j <= lastcol; j++) {
	900	t1a[j] = t1a[j] + f4 * t1b[j];
	901	}
	902	}
	903	if (gp[procid].neighbors[LEFT] == -1) {
	904	for (j = firstrow; j <= lastrow; j++) {
	905	t2a[j][0] = t2a[j][0] + f4 * t2b[j][0];
	906	}
	907	}
	908	if (gp[procid].neighbors[RIGHT] == -1) {
	909	for (j = firstrow; j <= lastrow; j++) {
	910	t2a[j][jm - 1] = t2a[j][jm - 1] + f4 * t2b[j][jm - 1];
	911	}
	912	}
	913	for (i = firstrow; i <= lastrow; i++) {
	914	t1a = (double *) t2a[i];
	915	t1b = (double *) t2b[i];
	916	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	917	t1a[iindex] = t1a[iindex] + f4 * t1b[iindex];
	918	}
	919	}
	920
	921	t2a = (double **) rhs_multi[procid][numlev - 1];
	922	t2b = (double **) gb[procid];
	923	t2c = (double **) oldgb[procid];
	924	t2d = (double **) q_multi[procid][numlev - 1];
	925	for (i = istart; i <= iend; i++) {
	926	t1a = (double *) t2a[i];
	927	t1b = (double *) t2b[i];
	928	for (j = jstart; j <= jend; j++) {
	929	t1a[j] = t1b[j] * ressqr;
	930	}
	931	}
	932	if (gp[procid].neighbors[UP] == -1) {
	933	t1d = (double *) t2d[0];
	934	t1b = (double *) t2b[0];
	935	for (j = jstart; j <= jend; j++) {
	936	t1d[j] = t1b[j];
	937	}
	938	}
	939	if (gp[procid].neighbors[DOWN] == -1) {
	940	t1d = (double *) t2d[im - 1];
	941	t1b = (double *) t2b[im - 1];
	942	for (j = jstart; j <= jend; j++) {
	943	t1d[j] = t1b[j];
	944	}
	945	}
	946	if (gp[procid].neighbors[LEFT] == -1) {
	947	for (i = istart; i <= iend; i++) {
	948	t2d[i][0] = t2b[i][0];
	949	}
	950	}
	951	if (gp[procid].neighbors[RIGHT] == -1) {
	952	for (i = istart; i <= iend; i++) {
	953	t2d[i][jm - 1] = t2b[i][jm - 1];
	954	}
	955	}
	956	//fac = 1.0 / (4.0 - ressqr*eig2);
	957	for (i = ist; i <= ien; i++) {
	958	t1d = (double *) t2d[i];
	959	t1c = (double *) t2c[i];
	960	for (j = jst; j <= jen; j++) {
	961	t1d[j] = t1c[j];
	962	}
	963	}
	964
	965	if ((procid == MASTER) \|\| (do_stats)) {
	966	CLOCK(multi_start);
	967	}
	968
	969	multig(procid);
	970
	971	if ((procid == MASTER) \|\| (do_stats)) {
	972	CLOCK(multi_end);
	973	(*gp[procid].multi_time) += (multi_end - multi_start);
	974	}
	975
	976	for (i = istart; i <= iend; i++) {
	977	t1b = (double *) t2b[i];
	978	t1c = (double *) t2c[i];
	979	t1d = (double *) t2d[i];
	980	for (j = jstart; j <= jend; j++) {
	981	t1b[j] = t1d[j];
	982	t1c[j] = t1d[j];
	983	}
	984	}
	985
	986	END_PHASE(procid, 8);
	987
	988	#if defined(MULTIPLE_BARRIERS)
	989	BARRIER(bars->sl_phase_8, nprocs)
	990	#else
	991	BARRIER(bars->barrier, nprocs)
	992	#endif
	993
	994	/* *******************************************************
	995
	996	n i n t h p h a s e
	997
	998	*******************************************************
	999
	1000	put appropriate linear combinations of ga and gb in work2 and work3;
	1001	note that here (as in most cases) the constant multipliers are made
	1002	private variables; the specific order in which things are done is
	1003	chosen in order to hopefully reuse things brought into the cache
	1004
	1005	note that here again we choose to have all processes share the work
	1006	on both matrices despite the fact that the work done per element
	1007	is the same, because the operand matrices are the same in both cases */
	1008
	1009	START_PHASE(procid, 9);
	1010
	1011	t2a = (double **) ga[procid];
	1012	t2b = (double **) gb[procid];
	1013	t2c = (double **) work2[procid];
	1014	t2d = (double **) work3[procid];
	1015	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1016	t2c[0][0] = t2b[0][0] - hh1 * t2a[0][0];
	1017	t2d[0][0] = t2b[0][0] + hh3 * t2a[0][0];
	1018	}
	1019	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1020	t2c[im - 1][0] = t2b[im - 1][0] - hh1 * t2a[im - 1][0];
	1021	t2d[im - 1][0] = t2b[im - 1][0] + hh3 * t2a[im - 1][0];
	1022	}
	1023	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1024	t2c[0][jm - 1] = t2b[0][jm - 1] - hh1 * t2a[0][jm - 1];
	1025	t2d[0][jm - 1] = t2b[0][jm - 1] + hh3 * t2a[0][jm - 1];
	1026	}
	1027	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1028	t2c[im - 1][jm - 1] = t2b[im - 1][jm - 1] - hh1 * t2a[im - 1][jm - 1];
	1029	t2d[im - 1][jm - 1] = t2b[im - 1][jm - 1] + hh3 * t2a[im - 1][jm - 1];
	1030	}
	1031	if (gp[procid].neighbors[UP] == -1) {
	1032	t1a = (double *) t2a[0];
	1033	t1b = (double *) t2b[0];
	1034	t1c = (double *) t2c[0];
	1035	t1d = (double *) t2d[0];
	1036	for (j = firstcol; j <= lastcol; j++) {
	1037	t1d[j] = t1b[j] + hh3 * t1a[j];
	1038	t1c[j] = t1b[j] - hh1 * t1a[j];
	1039	}
	1040	}
	1041	if (gp[procid].neighbors[DOWN] == -1) {
	1042	t1a = (double *) t2a[im - 1];
	1043	t1b = (double *) t2b[im - 1];
	1044	t1c = (double *) t2c[im - 1];
	1045	t1d = (double *) t2d[im - 1];
	1046	for (j = firstcol; j <= lastcol; j++) {
	1047	t1d[j] = t1b[j] + hh3 * t1a[j];
	1048	t1c[j] = t1b[j] - hh1 * t1a[j];
	1049	}
	1050	}
	1051	if (gp[procid].neighbors[LEFT] == -1) {
	1052	for (j = firstrow; j <= lastrow; j++) {
	1053	t2d[j][0] = t2b[j][0] + hh3 * t2a[j][0];
	1054	t2c[j][0] = t2b[j][0] - hh1 * t2a[j][0];
	1055	}
	1056	}
	1057	if (gp[procid].neighbors[RIGHT] == -1) {
	1058	for (j = firstrow; j <= lastrow; j++) {
	1059	t2d[j][jm - 1] = t2b[j][jm - 1] + hh3 * t2a[j][jm - 1];
	1060	t2c[j][jm - 1] = t2b[j][jm - 1] - hh1 * t2a[j][jm - 1];
	1061	}
	1062	}
	1063
	1064	for (i = firstrow; i <= lastrow; i++) {
	1065	t1a = (double *) t2a[i];
	1066	t1b = (double *) t2b[i];
	1067	t1c = (double *) t2c[i];
	1068	t1d = (double *) t2d[i];
	1069	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	1070	t1d[iindex] = t1b[iindex] + hh3 * t1a[iindex];
	1071	t1c[iindex] = t1b[iindex] - hh1 * t1a[iindex];
	1072	}
	1073	}
	1074
	1075	END_PHASE(procid, 9);
	1076
	1077	#if defined(MULTIPLE_BARRIERS)
	1078	BARRIER(bars->sl_phase_9, nprocs)
	1079	#else
	1080	BARRIER(bars->barrier, nprocs)
	1081	#endif
	1082
	1083	/* *******************************************************
	1084
	1085	t e n t h p h a s e
	1086
	1087	*******************************************************/
	1088
	1089	START_PHASE(procid, 10);
	1090	timst = 2 * dtau;
	1091
	1092	/* update the psi{1,3} matrices by adding 2dtauwork3 to each */
	1093
	1094	t2a = (double **) psi[procid][0];
	1095	t2b = (double **) work3[procid];
	1096	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1097	t2a[0][0] = t2a[0][0] + timst * t2b[0][0];
	1098	}
	1099	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1100	t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];
	1101	}
	1102	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1103	t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];
	1104	}
	1105	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1106	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];
	1107	}
	1108	if (gp[procid].neighbors[UP] == -1) {
	1109	t1a = (double *) t2a[0];
	1110	t1b = (double *) t2b[0];
	1111	for (j = firstcol; j <= lastcol; j++) {
	1112	t1a[j] = t1a[j] + timst * t1b[j];
	1113	}
	1114	}
	1115	if (gp[procid].neighbors[DOWN] == -1) {
	1116	t1a = (double *) t2a[im - 1];
	1117	t1b = (double *) t2b[im - 1];
	1118	for (j = firstcol; j <= lastcol; j++) {
	1119	t1a[j] = t1a[j] + timst * t1b[j];
	1120	}
	1121	}
	1122	if (gp[procid].neighbors[LEFT] == -1) {
	1123	for (j = firstrow; j <= lastrow; j++) {
	1124	t2a[j][0] = t2a[j][0] + timst * t2b[j][0];
	1125	}
	1126	}
	1127	if (gp[procid].neighbors[RIGHT] == -1) {
	1128	for (j = firstrow; j <= lastrow; j++) {
	1129	t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];
	1130	}
	1131	}
	1132	for (i = firstrow; i <= lastrow; i++) {
	1133	t1a = (double *) t2a[i];
	1134	t1b = (double *) t2b[i];
	1135	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	1136	t1a[iindex] = t1a[iindex] + timst * t1b[iindex];
	1137	}
	1138	}
	1139
	1140	t2a = (double **) psi[procid][1];
	1141	t2b = (double **) work2[procid];
	1142	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1143	t2a[0][0] = t2a[0][0] + timst * t2b[0][0];
	1144	}
	1145	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	1146	t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];
	1147	}
	1148	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1149	t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];
	1150	}
	1151	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	1152	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];
	1153	}
	1154	if (gp[procid].neighbors[UP] == -1) {
	1155	t1a = (double *) t2a[0];
	1156	t1b = (double *) t2b[0];
	1157	for (j = firstcol; j <= lastcol; j++) {
	1158	t1a[j] = t1a[j] + timst * t1b[j];
	1159	}
	1160	}
	1161	if (gp[procid].neighbors[DOWN] == -1) {
	1162	t1a = (double *) t2a[im - 1];
	1163	t1b = (double *) t2b[im - 1];
	1164	for (j = firstcol; j <= lastcol; j++) {
	1165	t1a[j] = t1a[j] + timst * t1b[j];
	1166	}
	1167	}
	1168	if (gp[procid].neighbors[LEFT] == -1) {
	1169	for (j = firstrow; j <= lastrow; j++) {
	1170	t2a[j][0] = t2a[j][0] + timst * t2b[j][0];
	1171	}
	1172	}
	1173	if (gp[procid].neighbors[RIGHT] == -1) {
	1174	for (j = firstrow; j <= lastrow; j++) {
	1175	t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];
	1176	}
	1177	}
	1178
	1179	for (i = firstrow; i <= lastrow; i++) {
	1180	t1a = (double *) t2a[i];
	1181	t1b = (double *) t2b[i];
	1182	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	1183	t1a[iindex] = t1a[iindex] + timst * t1b[iindex];
	1184	}
	1185	}
	1186
	1187	END_PHASE(procid, 10);
	1188
	1189	#if defined(MULTIPLE_BARRIERS)
	1190	BARRIER(bars->sl_phase_10, nprocs)
	1191	#else
	1192	BARRIER(bars->barrier, nprocs)
	1193	#endif
	1194	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: