Context Navigation

source: soft/giet_vm/applications/ocean/slave1.C @ 712

Last change on this file since 712 was 598, checked in by guerin, 10 years ago
ocean: fix app broken by r589
File size: 27.1 KB

Rev	Line
[598]	1	/*************************************************************************/
	2	/* */
	3	/* Copyright (c) 1994 Stanford University */
	4	/* */
	5	/* All rights reserved. */
	6	/* */
	7	/* Permission is given to use, copy, and modify this software for any */
	8	/* non-commercial purpose as long as this copyright notice is not */
	9	/* removed. All other uses, including redistribution in whole or in */
	10	/* part, are forbidden without prior written permission. */
	11	/* */
	12	/* This software is provided with absolutely no warranty and no */
	13	/* support. */
	14	/* */
	15	/*************************************************************************/
[581]	16
[598]	17	/* ****************
	18	subroutine slave
	19	**************** */
	20
	21	EXTERN_ENV
	22
	23	#include <stdio.h>
	24	#include <math.h>
	25	#include <stdlib.h>
	26
	27	#include "decs.h"
	28
	29	void slave(long *ptr_procid)
	30	{
	31	long i;
	32	long j;
	33	long nstep;
	34	long iindex;
	35	long iday;
	36	double ysca1;
	37	double y;
	38	double factor;
	39	double sintemp;
	40	double curlt;
	41	double ressqr;
	42	long istart;
	43	long iend;
	44	long jstart;
	45	long jend;
	46	long ist;
	47	long ien;
	48	long jst;
	49	long jen;
	50	double fac;
	51	long dayflag = 0;
	52	long dhourflag = 0;
	53	long endflag = 0;
	54	long firstrow;
	55	long lastrow;
	56	long numrows;
	57	long firstcol;
	58	long lastcol;
	59	long numcols;
	60	long psiindex;
	61	double psibipriv;
	62	double ttime;
	63	double dhour;
	64	double day;
	65	long procid;
	66	long j_off = 0;
	67	unsigned long t1;
	68	double **t2a;
	69	double **t2b;
	70	double *t1a;
	71	double *t1b;
	72	double *t1c;
	73	double *t1d;
	74
	75
	76	/*
	77	LOCK(locks->idlock)
	78	procid = global->id;
	79	global->id = global->id+1;
	80	UNLOCK(locks->idlock)
	81	*/
	82
	83	procid = *ptr_procid;
	84	ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];
	85
	86	#if defined(MULTIPLE_BARRIERS)
	87	BARRIER(bars->sl_prini, nprocs)
	88	#else
	89	BARRIER(bars->barrier, nprocs)
	90	#endif
	91	/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
	92	processors to avoid migration. */
	93	/* POSSIBLE ENHANCEMENT: Here is where one might distribute
	94	data structures across physically distributed memories as
	95	desired.
	96
	97	One way to do this is as follows. The function allocate(START,SIZE,I)
	98	is assumed to place all addresses x such that
	99	(START <= x < START+SIZE) on node I.
	100
	101	long d_size;
	102	unsigned long g_size;
	103	unsigned long mg_size;
	104
	105	if (procid == MASTER) {
	106	g_size = ((jmx[numlev-1]-2)/xprocs+2)((imx[numlev-1]-2)/yprocs+2)siz
	107	eof(double) +
	108	((imx[numlev-1]-2)/yprocs+2)sizeof(double );
	109
	110	mg_size = numlevsizeof(double *);
	111	for (i=0;i<numlev;i++) {
	112	mg_size+=((imx[i]-2)/yprocs+2)((jmx[i]-2)/xprocs+2)sizeof(double)+
	113	((imx[i]-2)/yprocs+2)sizeof(double );
	114	}
	115	for (i= 0;i<nprocs;i++) {
	116	d_size = 2sizeof(double *);
	117	allocate((unsigned long) psi[i],d_size,i);
	118	allocate((unsigned long) psim[i],d_size,i);
	119	allocate((unsigned long) work1[i],d_size,i);
	120	allocate((unsigned long) work4[i],d_size,i);
	121	allocate((unsigned long) work5[i],d_size,i);
	122	allocate((unsigned long) work7[i],d_size,i);
	123	allocate((unsigned long) temparray[i],d_size,i);
	124	allocate((unsigned long) psi[i][0],g_size,i);
	125	allocate((unsigned long) psi[i][1],g_size,i);
	126	allocate((unsigned long) psim[i][0],g_size,i);
	127	allocate((unsigned long) psim[i][1],g_size,i);
	128	allocate((unsigned long) psium[i],g_size,i);
	129	allocate((unsigned long) psilm[i],g_size,i);
	130	allocate((unsigned long) psib[i],g_size,i);
	131	allocate((unsigned long) ga[i],g_size,i);
	132	allocate((unsigned long) gb[i],g_size,i);
	133	allocate((unsigned long) work1[i][0],g_size,i);
	134	allocate((unsigned long) work1[i][1],g_size,i);
	135	allocate((unsigned long) work2[i],g_size,i);
	136	allocate((unsigned long) work3[i],g_size,i);
	137	allocate((unsigned long) work4[i][0],g_size,i);
	138	allocate((unsigned long) work4[i][1],g_size,i);
	139	allocate((unsigned long) work5[i][0],g_size,i);
	140	allocate((unsigned long) work5[i][1],g_size,i);
	141	allocate((unsigned long) work6[i],g_size,i);
	142	allocate((unsigned long) work7[i][0],g_size,i);
	143	allocate((unsigned long) work7[i][1],g_size,i);
	144	allocate((unsigned long) temparray[i][0],g_size,i);
	145	allocate((unsigned long) temparray[i][1],g_size,i);
	146	allocate((unsigned long) tauz[i],g_size,i);
	147	allocate((unsigned long) oldga[i],g_size,i);
	148	allocate((unsigned long) oldgb[i],g_size,i);
	149	d_size = numlev * sizeof(long);
	150	allocate((unsigned long) gp[i].rel_num_x,d_size,i);
	151	allocate((unsigned long) gp[i].rel_num_y,d_size,i);
	152	allocate((unsigned long) gp[i].eist,d_size,i);
	153	allocate((unsigned long) gp[i].ejst,d_size,i);
	154	allocate((unsigned long) gp[i].oist,d_size,i);
	155	allocate((unsigned long) gp[i].ojst,d_size,i);
	156	allocate((unsigned long) gp[i].rlist,d_size,i);
	157	allocate((unsigned long) gp[i].rljst,d_size,i);
	158	allocate((unsigned long) gp[i].rlien,d_size,i);
	159	allocate((unsigned long) gp[i].rljen,d_size,i);
	160
	161	allocate((unsigned long) q_multi[i],mg_size,i);
	162	allocate((unsigned long) rhs_multi[i],mg_size,i);
	163	allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
	164	}
	165	}
	166
	167	*/
	168	t2a = (double **) oldga[procid];
	169	t2b = (double **) oldgb[procid];
	170	for (i = 0; i < im; i++) {
	171	t1a = (double *) t2a[i];
	172	t1b = (double *) t2b[i];
	173	for (j = 0; j < jm; j++) {
	174	t1a[j] = 0.0;
	175	t1b[j] = 0.0;
	176	}
	177	}
	178
	179	firstcol = 1;
	180	lastcol = firstcol + gp[procid].rel_num_x[numlev - 1] - 1;
	181	firstrow = 1;
	182	lastrow = firstrow + gp[procid].rel_num_y[numlev - 1] - 1;
	183	numcols = gp[procid].rel_num_x[numlev - 1];
	184	numrows = gp[procid].rel_num_y[numlev - 1];
	185	j_off = (gp[procid].colnum) numcols;
	186
	187	/*
	188	if (procid > nprocs/2) {
	189	psinum = 2;
	190	} else {
	191	psinum = 1;
	192	}
	193	*/
	194
	195	/* every process gets its own copy of the timing variables to avoid
	196	contention at shared memory locations. here, these variables
	197	are initialized. */
	198
	199	ttime = 0.0;
	200	dhour = 0.0;
	201	nstep = 0;
	202	day = 0.0;
	203
	204	ysca1 = 0.5 * ysca;
	205
	206	if (*gp[procid].lpid == MASTER) {
	207
	208	f = (double ) G_MALLOC(oim sizeof(double), procid);
	209
	210	t1a = (double *) f;
	211	for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) {
	212	y = ((double) iindex) * res;
	213	t1a[iindex] = f0 + beta * (y - ysca1);
	214	}
	215	}
	216
	217	t2a = (double **) psium[procid];
	218	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	219	t2a[0][0] = 0.0;
	220	}
	221	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	222	t2a[im - 1][0] = 0.0;
	223	}
	224	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	225	t2a[0][jm - 1] = 0.0;
	226	}
	227	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	228	t2a[im - 1][jm - 1] = 0.0;
	229	}
	230	if (gp[procid].neighbors[UP] == -1) {
	231	t1a = (double *) t2a[0];
	232	for (j = firstcol; j <= lastcol; j++) {
	233	t1a[j] = 0.0;
	234	}
	235	}
	236	if (gp[procid].neighbors[DOWN] == -1) {
	237	t1a = (double *) t2a[im - 1];
	238	for (j = firstcol; j <= lastcol; j++) {
	239	t1a[j] = 0.0;
	240	}
	241	}
	242	if (gp[procid].neighbors[LEFT] == -1) {
	243	for (j = firstrow; j <= lastrow; j++) {
	244	t2a[j][0] = 0.0;
	245	}
	246	}
	247	if (gp[procid].neighbors[RIGHT] == -1) {
	248	for (j = firstrow; j <= lastrow; j++) {
	249	t2a[j][jm - 1] = 0.0;
	250	}
	251	}
	252
	253	for (i = firstrow; i <= lastrow; i++) {
	254	t1a = (double *) t2a[i];
	255	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	256	t1a[iindex] = 0.0;
	257	}
	258	}
	259	t2a = (double **) psilm[procid];
	260	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	261	t2a[0][0] = 0.0;
	262	}
	263	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	264	t2a[im - 1][0] = 0.0;
	265	}
	266	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	267	t2a[0][jm - 1] = 0.0;
	268	}
	269	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	270	t2a[im - 1][jm - 1] = 0.0;
	271	}
	272	if (gp[procid].neighbors[UP] == -1) {
	273	t1a = (double *) t2a[0];
	274	for (j = firstcol; j <= lastcol; j++) {
	275	t1a[j] = 0.0;
	276	}
	277	}
	278	if (gp[procid].neighbors[DOWN] == -1) {
	279	t1a = (double *) t2a[im - 1];
	280	for (j = firstcol; j <= lastcol; j++) {
	281	t1a[j] = 0.0;
	282	}
	283	}
	284	if (gp[procid].neighbors[LEFT] == -1) {
	285	for (j = firstrow; j <= lastrow; j++) {
	286	t2a[j][0] = 0.0;
	287	}
	288	}
	289	if (gp[procid].neighbors[RIGHT] == -1) {
	290	for (j = firstrow; j <= lastrow; j++) {
	291	t2a[j][jm - 1] = 0.0;
	292	}
	293	}
	294	for (i = firstrow; i <= lastrow; i++) {
	295	t1a = (double *) t2a[i];
	296	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	297	t1a[iindex] = 0.0;
	298	}
	299	}
	300
	301	t2a = (double **) psib[procid];
	302	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	303	t2a[0][0] = 1.0;
	304	}
	305	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	306	t2a[0][jm - 1] = 1.0;
	307	}
	308	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	309	t2a[im - 1][0] = 1.0;
	310	}
	311	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	312	t2a[im - 1][jm - 1] = 1.0;
	313	}
	314	if (gp[procid].neighbors[UP] == -1) {
	315	t1a = (double *) t2a[0];
	316	for (j = firstcol; j <= lastcol; j++) {
	317	t1a[j] = 1.0;
	318	}
	319	}
	320	if (gp[procid].neighbors[DOWN] == -1) {
	321	t1a = (double *) t2a[im - 1];
	322	for (j = firstcol; j <= lastcol; j++) {
	323	t1a[j] = 1.0;
	324	}
	325	}
	326	if (gp[procid].neighbors[LEFT] == -1) {
	327	for (j = firstrow; j <= lastrow; j++) {
	328	t2a[j][0] = 1.0;
	329	}
	330	}
	331	if (gp[procid].neighbors[RIGHT] == -1) {
	332	for (j = firstrow; j <= lastrow; j++) {
	333	t2a[j][jm - 1] = 1.0;
	334	}
	335	}
	336	for (i = firstrow; i <= lastrow; i++) {
	337	t1a = (double *) t2a[i];
	338	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	339	t1a[iindex] = 0.0;
	340	}
	341	}
	342
	343	/* wait until all processes have completed the above initialization */
	344	#if defined(MULTIPLE_BARRIERS)
	345	BARRIER(bars->sl_prini, nprocs)
	346	#else
	347	BARRIER(bars->barrier, nprocs)
	348	#endif
	349	/* compute psib array (one-time computation) and integrate into psibi */
	350	istart = 1;
	351	iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;
	352	jstart = 1;
	353	jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;
	354	ist = istart;
	355	ien = iend;
	356	jst = jstart;
	357	jen = jend;
	358
	359	if (gp[procid].neighbors[UP] == -1) {
	360	istart = 0;
	361	}
	362	if (gp[procid].neighbors[LEFT] == -1) {
	363	jstart = 0;
	364	}
	365	if (gp[procid].neighbors[DOWN] == -1) {
	366	iend = im - 1;
	367	}
	368	if (gp[procid].neighbors[RIGHT] == -1) {
	369	jend = jm - 1;
	370	}
	371
	372	t2a = (double **) rhs_multi[procid][numlev - 1];
	373	t2b = (double **) psib[procid];
	374	for (i = istart; i <= iend; i++) {
	375	t1a = (double *) t2a[i];
	376	t1b = (double *) t2b[i];
	377	for (j = jstart; j <= jend; j++) {
	378	t1a[j] = t1b[j] * ressqr;
	379	}
	380	}
	381	t2a = (double **) q_multi[procid][numlev - 1];
	382	if (gp[procid].neighbors[UP] == -1) {
	383	t1a = (double *) t2a[0];
	384	t1b = (double *) t2b[0];
	385	for (j = jstart; j <= jend; j++) {
	386	t1a[j] = t1b[j];
	387	}
	388	}
	389	if (gp[procid].neighbors[DOWN] == -1) {
	390	t1a = (double *) t2a[im - 1];
	391	t1b = (double *) t2b[im - 1];
	392	for (j = jstart; j <= jend; j++) {
	393	t1a[j] = t1b[j];
	394	}
	395	}
	396	if (gp[procid].neighbors[LEFT] == -1) {
	397	for (i = istart; i <= iend; i++) {
	398	t2a[i][0] = t2b[i][0];
	399	}
	400	}
	401	if (gp[procid].neighbors[RIGHT] == -1) {
	402	for (i = istart; i <= iend; i++) {
	403	t2a[i][jm - 1] = t2b[i][jm - 1];
	404	}
	405	}
	406
	407	#if defined(MULTIPLE_BARRIERS)
	408	BARRIER(bars->sl_psini, nprocs)
	409	#else
	410	BARRIER(bars->barrier, nprocs)
	411	#endif
	412
	413	t2a = (double **) psib[procid];
	414	j = gp[procid].neighbors[UP];
	415	if (j != -1) {
	416	t1a = (double *) t2a[0];
	417	t1b = (double *) psib[j][im - 2];
	418	for (i = 1; i < jm - 1; i++) {
	419	t1a[i] = t1b[i];
	420	}
	421	}
	422	j = gp[procid].neighbors[DOWN];
	423	if (j != -1) {
	424	t1a = (double *) t2a[im - 1];
	425	t1b = (double *) psib[j][1];
	426	for (i = 1; i < jm - 1; i++) {
	427	t1a[i] = t1b[i];
	428	}
	429	}
	430	j = gp[procid].neighbors[LEFT];
	431	if (j != -1) {
	432	t2b = (double **) psib[j];
	433	for (i = 1; i < im - 1; i++) {
	434	t2a[i][0] = t2b[i][jm - 2];
	435	}
	436	}
	437	j = gp[procid].neighbors[RIGHT];
	438	if (j != -1) {
	439	t2b = (double **) psib[j];
	440	for (i = 1; i < im - 1; i++) {
	441	t2a[i][jm - 1] = t2b[i][1];
	442	}
	443	}
	444
	445	t2a = (double **) q_multi[procid][numlev - 1];
	446	t2b = (double **) psib[procid];
	447	fac = 1.0 / (4.0 - ressqr * eig2);
	448	for (i = ist; i <= ien; i++) {
	449	t1a = (double *) t2a[i];
	450	t1b = (double *) t2b[i];
	451	t1c = (double *) t2b[i - 1];
	452	t1d = (double *) t2b[i + 1];
	453	for (j = jst; j <= jen; j++) {
	454	t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]);
	455	}
	456	}
	457
	458	multig(procid);
	459
	460	for (i = istart; i <= iend; i++) {
	461	t1a = (double *) t2a[i];
	462	t1b = (double *) t2b[i];
	463	for (j = jstart; j <= jend; j++) {
	464	t1b[j] = t1a[j];
	465	}
	466	}
	467
	468	#if defined(MULTIPLE_BARRIERS)
	469	BARRIER(bars->sl_prini, nprocs)
	470	#else
	471	BARRIER(bars->barrier, nprocs)
	472	#endif
	473
	474	/* update the local running sum psibipriv by summing all the resulting
	475	values in that process's share of the psib matrix */
	476
	477	t2a = (double **) psib[procid];
	478	psibipriv = 0.0;
	479	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	480	psibipriv = psibipriv + 0.25 * (t2a[0][0]);
	481	}
	482	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	483	psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]);
	484	}
	485	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	486	psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]);
	487	}
	488	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	489	psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]);
	490	}
	491	if (gp[procid].neighbors[UP] == -1) {
	492	t1a = (double *) t2a[0];
	493	for (j = firstcol; j <= lastcol; j++) {
	494	psibipriv = psibipriv + 0.5 * t1a[j];
	495	}
	496	}
	497	if (gp[procid].neighbors[DOWN] == -1) {
	498	t1a = (double *) t2a[im - 1];
	499	for (j = firstcol; j <= lastcol; j++) {
	500	psibipriv = psibipriv + 0.5 * t1a[j];
	501	}
	502	}
	503	if (gp[procid].neighbors[LEFT] == -1) {
	504	for (j = firstrow; j <= lastrow; j++) {
	505	psibipriv = psibipriv + 0.5 * t2a[j][0];
	506	}
	507	}
	508	if (gp[procid].neighbors[RIGHT] == -1) {
	509	for (j = firstrow; j <= lastrow; j++) {
	510	psibipriv = psibipriv + 0.5 * t2a[j][jm - 1];
	511	}
	512	}
	513	for (i = firstrow; i <= lastrow; i++) {
	514	t1a = (double *) t2a[i];
	515	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	516	psibipriv = psibipriv + t1a[iindex];
	517	}
	518	}
	519
	520	/* update the shared variable psibi by summing all the psibiprivs
	521	of the individual processes into it. note that this combined
	522	private and shared sum method avoids accessing the shared
	523	variable psibi once for every element of the matrix. */
	524
	525	LOCK(locks->psibilock);
	526	global->psibi = global->psibi + psibipriv;
	527	UNLOCK(locks->psibilock);
	528
	529	/* initialize psim matrices
	530
	531	if there is more than one process, then split the processes
	532	between the two psim matrices; otherwise, let the single process
	533	work on one first and then the other */
	534
	535	for (psiindex = 0; psiindex <= 1; psiindex++) {
	536	t2a = (double **) psim[procid][psiindex];
	537	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	538	t2a[0][0] = 0.0;
	539	}
	540	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	541	t2a[im - 1][0] = 0.0;
	542	}
	543	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	544	t2a[0][jm - 1] = 0.0;
	545	}
	546	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	547	t2a[im - 1][jm - 1] = 0.0;
	548	}
	549	if (gp[procid].neighbors[UP] == -1) {
	550	t1a = (double *) t2a[0];
	551	for (j = firstcol; j <= lastcol; j++) {
	552	t1a[j] = 0.0;
	553	}
	554	}
	555	if (gp[procid].neighbors[DOWN] == -1) {
	556	t1a = (double *) t2a[im - 1];
	557	for (j = firstcol; j <= lastcol; j++) {
	558	t1a[j] = 0.0;
	559	}
	560	}
	561	if (gp[procid].neighbors[LEFT] == -1) {
	562	for (j = firstrow; j <= lastrow; j++) {
	563	t2a[j][0] = 0.0;
	564	}
	565	}
	566	if (gp[procid].neighbors[RIGHT] == -1) {
	567	for (j = firstrow; j <= lastrow; j++) {
	568	t2a[j][jm - 1] = 0.0;
	569	}
	570	}
	571	for (i = firstrow; i <= lastrow; i++) {
	572	t1a = (double *) t2a[i];
	573	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	574	t1a[iindex] = 0.0;
	575	}
	576	}
	577	}
	578
	579	/* initialize psi matrices the same way */
	580
	581	for (psiindex = 0; psiindex <= 1; psiindex++) {
	582	t2a = (double **) psi[procid][psiindex];
	583	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	584	t2a[0][0] = 0.0;
	585	}
	586	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	587	t2a[0][jm - 1] = 0.0;
	588	}
	589	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	590	t2a[im - 1][0] = 0.0;
	591	}
	592	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	593	t2a[im - 1][jm - 1] = 0.0;
	594	}
	595	if (gp[procid].neighbors[UP] == -1) {
	596	t1a = (double *) t2a[0];
	597	for (j = firstcol; j <= lastcol; j++) {
	598	t1a[j] = 0.0;
	599	}
	600	}
	601	if (gp[procid].neighbors[DOWN] == -1) {
	602	t1a = (double *) t2a[im - 1];
	603	for (j = firstcol; j <= lastcol; j++) {
	604	t1a[j] = 0.0;
	605	}
	606	}
	607	if (gp[procid].neighbors[LEFT] == -1) {
	608	for (j = firstrow; j <= lastrow; j++) {
	609	t2a[j][0] = 0.0;
	610	}
	611	}
	612	if (gp[procid].neighbors[RIGHT] == -1) {
	613	for (j = firstrow; j <= lastrow; j++) {
	614	t2a[j][jm - 1] = 0.0;
	615	}
	616	}
	617	for (i = firstrow; i <= lastrow; i++) {
	618	t1a = (double *) t2a[i];
	619	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	620	t1a[iindex] = 0.0;
	621	}
	622	}
	623	}
	624
	625	/* compute input curl of wind stress */
	626
	627
	628	t2a = (double **) tauz[procid];
	629	ysca1 = .5 * ysca;
	630	factor = -t0 * pi / ysca1;
	631	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	632	t2a[0][0] = 0.0;
	633	}
	634	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	635	t2a[im - 1][0] = 0.0;
	636	}
	637	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	638	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
	639	sintemp = sin(sintemp);
	640	t2a[0][jm - 1] = factor * sintemp;
	641	}
	642	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	643	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
	644	sintemp = sin(sintemp);
	645	t2a[im - 1][jm - 1] = factor * sintemp;
	646	}
	647	if (gp[procid].neighbors[UP] == -1) {
	648	t1a = (double *) t2a[0];
	649	for (j = firstcol; j <= lastcol; j++) {
	650	sintemp = pi * ((double) j + j_off) * res / ysca1;
	651	sintemp = sin(sintemp);
	652	curlt = factor * sintemp;
	653	t1a[j] = curlt;
	654	}
	655	}
	656	if (gp[procid].neighbors[DOWN] == -1) {
	657	t1a = (double *) t2a[im - 1];
	658	for (j = firstcol; j <= lastcol; j++) {
	659	sintemp = pi * ((double) j + j_off) * res / ysca1;
	660	sintemp = sin(sintemp);
	661	curlt = factor * sintemp;
	662	t1a[j] = curlt;
	663	}
	664	}
	665	if (gp[procid].neighbors[LEFT] == -1) {
	666	for (j = firstrow; j <= lastrow; j++) {
	667	t2a[j][0] = 0.0;
	668	}
	669	}
	670	if (gp[procid].neighbors[RIGHT] == -1) {
	671	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
	672	sintemp = sin(sintemp);
	673	curlt = factor * sintemp;
	674	for (j = firstrow; j <= lastrow; j++) {
	675	t2a[j][jm - 1] = curlt;
	676	}
	677	}
	678	for (i = firstrow; i <= lastrow; i++) {
	679	t1a = (double *) t2a[i];
	680	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	681	sintemp = pi * ((double) iindex + j_off) * res / ysca1;
	682	sintemp = sin(sintemp);
	683	curlt = factor * sintemp;
	684	t1a[iindex] = curlt;
	685	}
	686	}
	687
	688	#if defined(MULTIPLE_BARRIERS)
	689	BARRIER(bars->sl_onetime, nprocs)
	690	#else
	691	BARRIER(bars->barrier, nprocs)
	692	#endif
	693
	694	/***************************************************************
	695	one-time stuff over at this point
	696	***************************************************************/
	697	while (!endflag) {
	698	while ((!dayflag) \|\| (!dhourflag)) {
	699	dayflag = 0;
	700	dhourflag = 0;
	701	if (nstep == 1) {
	702	for (i = 0; i < 10; i++) {
	703	gp[procid].steps_time[i] = 0;
	704	}
	705	if (procid == MASTER) {
	706	CLOCK(global->trackstart)
	707	}
	708	if ((procid == MASTER) \|\| (do_stats)) {
	709	CLOCK(t1);
	710	(*gp[procid].total_time) = t1;
	711	(*gp[procid].multi_time) = 0;
	712	}
	713	/* POSSIBLE ENHANCEMENT: Here is where one might reset the
	714	statistics that one is measuring about the parallel execution */
	715	}
	716
	717	slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols);
	718
	719	/* update time and step number
	720	note that these time and step variables are private i.e. every
	721	process has its own copy and keeps track of its own time */
	722
	723	ttime = ttime + dtau;
	724	nstep = nstep + 1;
	725	day = ttime / 86400.0;
	726
	727	if (day > ((double) outday0)) {
	728	dayflag = 1;
	729	iday = (long) day;
	730	dhour = dhour + dtau;
	731	if (dhour >= 86400.0) {
	732	dhourflag = 1;
	733	}
	734	}
	735	}
	736	dhour = 0.0;
	737
	738	t2a = (double **) psium[procid];
	739	t2b = (double **) psim[procid][0];
	740	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	741	t2a[0][0] = t2a[0][0] + t2b[0][0];
	742	}
	743	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	744	t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
	745	}
	746	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	747	t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
	748	}
	749	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	750	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
	751	}
	752	if (gp[procid].neighbors[UP] == -1) {
	753	t1a = (double *) t2a[0];
	754	t1b = (double *) t2b[0];
	755	for (j = firstcol; j <= lastcol; j++) {
	756	t1a[j] = t1a[j] + t1b[j];
	757	}
	758	}
	759	if (gp[procid].neighbors[DOWN] == -1) {
	760	t1a = (double *) t2a[im - 1];
	761	t1b = (double *) t2b[im - 1];
	762	for (j = firstcol; j <= lastcol; j++) {
	763	t1a[j] = t1a[j] + t1b[j];
	764	}
	765	}
	766	if (gp[procid].neighbors[LEFT] == -1) {
	767	for (j = firstrow; j <= lastrow; j++) {
	768	t2a[j][0] = t2a[j][0] + t2b[j][0];
	769	}
	770	}
	771	if (gp[procid].neighbors[RIGHT] == -1) {
	772	for (j = firstrow; j <= lastrow; j++) {
	773	t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
	774	}
	775	}
	776	for (i = firstrow; i <= lastrow; i++) {
	777	t1a = (double *) t2a[i];
	778	t1b = (double *) t2b[i];
	779	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	780	t1a[iindex] = t1a[iindex] + t1b[iindex];
	781	}
	782	}
	783
	784	/* update values of psilm array to psilm + psim[2] */
	785
	786	t2a = (double **) psilm[procid];
	787	t2b = (double **) psim[procid][1];
	788	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	789	t2a[0][0] = t2a[0][0] + t2b[0][0];
	790	}
	791	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
	792	t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
	793	}
	794	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	795	t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
	796	}
	797	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
	798	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
	799	}
	800	if (gp[procid].neighbors[UP] == -1) {
	801	t1a = (double *) t2a[0];
	802	t1b = (double *) t2b[0];
	803	for (j = firstcol; j <= lastcol; j++) {
	804	t1a[j] = t1a[j] + t1b[j];
	805	}
	806	}
	807	if (gp[procid].neighbors[DOWN] == -1) {
	808	t1a = (double *) t2a[im - 1];
	809	t1b = (double *) t2b[im - 1];
	810	for (j = firstcol; j <= lastcol; j++) {
	811	t1a[j] = t1a[j] + t1b[j];
	812	}
	813	}
	814	if (gp[procid].neighbors[LEFT] == -1) {
	815	for (j = firstrow; j <= lastrow; j++) {
	816	t2a[j][0] = t2a[j][0] + t2b[j][0];
	817	}
	818	}
	819	if (gp[procid].neighbors[RIGHT] == -1) {
	820	for (j = firstrow; j <= lastrow; j++) {
	821	t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
	822	}
	823	}
	824	for (i = firstrow; i <= lastrow; i++) {
	825	t1a = (double *) t2a[i];
	826	t1b = (double *) t2b[i];
	827	for (iindex = firstcol; iindex <= lastcol; iindex++) {
	828	t1a[iindex] = t1a[iindex] + t1b[iindex];
	829	}
	830	}
	831	if (iday >= (long) outday3) {
	832	endflag = 1;
	833	}
	834	}
	835	if ((procid == MASTER) \|\| (do_stats)) {
	836	CLOCK(t1);
	837	(gp[procid].total_time) = t1 - (gp[procid].total_time);
	838	}
	839	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: